private void validateSelectedTable(
     List<String> invalidParameterList,
     String paraName,
     String paraValue,
     VariableModel variableModel) {
   if (invalidParameterList.contains(paraName)) {
     return;
   }
   paraValue = VariableModelUtility.getReplaceValue(variableModel, paraValue);
   OperatorInputTableInfo operatorInputTableInfo = null;
   List<Object> operatorInputList = getOperatorInputList();
   List<String> tableList = new ArrayList<String>();
   String schema = null;
   for (Object obj : operatorInputList) {
     if (obj instanceof OperatorInputTableInfo) {
       operatorInputTableInfo = (OperatorInputTableInfo) obj;
       String table = operatorInputTableInfo.getTable();
       table = VariableModelUtility.getReplaceValue(variableModel, table);
       tableList.add(table);
       schema = operatorInputTableInfo.getSchema();
       schema = VariableModelUtility.getReplaceValue(variableModel, schema);
     }
   }
   if (!StringUtil.isEmpty(paraValue)) {
     String[] temp = paraValue.split("\\.");
     if (!temp[0].equals(schema) || !tableList.contains(temp[1])) {
       invalidParameterList.add(paraName);
     }
   }
 }
 @Override
 public boolean equals(Object obj) {
   return StringUtil.safeEquals(pigScript, ((AnalysisPigExecutableModel) obj).getPigScript())
       && ListUtility.equalsIgnoreOrder(
           pigInputMapItems, ((AnalysisPigExecutableModel) obj).getPigInputMapItems());
 }
  public Model learn(DataSet dataSet, LinearRegressionParameter para, String columnNames)
      throws OperatorException {
    this.dataSet = dataSet;
    ArrayList<String> columnNamesList = new ArrayList<String>();
    if (columnNames != null && !StringUtil.isEmpty(columnNames.trim())) {
      String[] columnNamesArray = columnNames.split(",");
      for (String s : columnNamesArray) {
        columnNamesList.add(s);
      }
    }
    transformer.setColumnNames(columnNamesList);
    transformer.setAnalysisInterActionModel(para.getAnalysisInterActionModel());
    newDataSet = transformer.TransformCategoryToNumeric_new(dataSet, groupbyColumn);
    DatabaseConnection databaseConnection =
        ((DBTable) newDataSet.getDBTable()).getDatabaseConnection();

    Column label = newDataSet.getColumns().getLabel();
    String labelName = StringHandler.doubleQ(label.getName());
    String tableName = ((DBTable) dataSet.getDBTable()).getTableName();

    String newTableName = ((DBTable) newDataSet.getDBTable()).getTableName();

    try {
      st = databaseConnection.createStatement(false);
    } catch (SQLException e) {
      e.printStackTrace();
      throw new OperatorException(e.getLocalizedMessage());
    }
    try {
      newDataSet.computeAllColumnStatistics();
      Columns atts = newDataSet.getColumns();

      Iterator<Column> atts_i = atts.iterator();

      int count = 0;
      String[] columnNamesArray = new String[atts.size()];
      while (atts_i.hasNext()) {
        Column att = atts_i.next();
        columnNamesArray[count] = att.getName();
        count++;
      }
      null_list = calculateNull(dataSet);
      null_list_group = calculateNullGroup(newDataSet, atts);
      StringBuilder sb_notNull = getWhere(atts);

      getCoefficientAndR2Group(
          columnNames,
          dataSet,
          labelName,
          tableName,
          newTableName,
          atts,
          columnNamesArray,
          sb_notNull);

      HashMap<String, Long> degreeOfFreedom = new HashMap<String, Long>();
      for (String groupValue : groupCount.keySet()) {
        long tempDof = groupCount.get(groupValue) - columnNamesArray.length - 1;
        if (tempDof <= 0) {
          model.getOneModel(groupValue).setS(Double.NaN);
        }
        degreeOfFreedom.put(groupValue, tempDof);
      }

      //			if (dof <= 0)
      //			{
      //				model.setS(Double.NaN);
      //				return model;
      //			}
      StringBuffer sSQL =
          createSSQLLGroup(
              newDataSet, newTableName, label, columnNamesArray, coefficients, sb_notNull);
      HashMap<String, Double> sValueMap = new HashMap<String, Double>();

      try {
        itsLogger.debug(classLogInfo + ".learn():sql=" + sSQL);
        rs = st.executeQuery(sSQL.toString());
        while (rs.next()) {
          String groupValue = rs.getString(2);
          if (groupValue == null) {
            continue;
          }
          if (dataErrorList.contains(groupValue)) {
            sValueMap.put(groupValue, Double.NaN);
          } else {
            sValueMap.put(groupValue, rs.getDouble(1));
          }
        }
        rs.close();
      } catch (SQLException e) {
        e.printStackTrace();
        itsLogger.error(e.getMessage(), e);
        throw new OperatorException(e.getLocalizedMessage());
      }

      HashMap<String, Matrix> varianceCovarianceMatrix =
          getVarianceCovarianceMatrixGroup(newTableName, columnNamesArray, st);
      for (String groupValue : varianceCovarianceMatrix.keySet()) {
        if (varianceCovarianceMatrix == null) {
          model
              .getOneModel(groupValue)
              .setErrorString(
                  AlpineDataAnalysisLanguagePack.getMessage(
                          AlpineDataAnalysisLanguagePack.MATRIX_IS_SIGULAR,
                          AlpineThreadLocal.getLocale())
                      + Tools.getLineSeparator());
        }
      }
      caculateStatistics(
          columnNamesArray,
          coefficients,
          model,
          sValueMap,
          varianceCovarianceMatrix,
          degreeOfFreedom);
      for (String groupValue : null_list_group.keySet()) {
        if (null_list_group.get(groupValue).size() != 0) {
          StringBuilder sb_null = new StringBuilder();
          for (int i = 0; i < null_list_group.get(groupValue).size(); i++) {
            sb_null
                .append(StringHandler.doubleQ(null_list_group.get(groupValue).get(i)))
                .append(",");
          }
          sb_null = sb_null.deleteCharAt(sb_null.length() - 1);
          String table_exist_null =
              AlpineDataAnalysisLanguagePack.getMessage(
                  AlpineDataAnalysisLanguagePack.TABLE_EXIST_NULL, AlpineThreadLocal.getLocale());
          String[] temp = table_exist_null.split(";");
          model
              .getOneModel(groupValue)
              .setErrorString(temp[0] + sb_null.toString() + temp[1] + Tools.getLineSeparator());
        }
      }
      if (transformer.isTransform()) {
        dropTable(newTableName);
      }
      st.close();
      itsLogger.debug(LogUtils.exit(classLogInfo, "learn", model.toString()));
      model.setGroupByColumn(groupbyColumn);
      return model;

    } catch (Exception e) {
      itsLogger.error(e.getMessage(), e);
      throw new OperatorException(e.getLocalizedMessage());
    }
  }
  @Override
  public VisualizationOutPut generateOutPut(AnalyticOutPut analyzerOutPut) {
    EngineModel emodel = null;
    ARIMAModel model = null;
    if (!(analyzerOutPut instanceof AnalyzerOutPutTrainModel)) return null;

    emodel = (EngineModel) ((AnalyzerOutPutTrainModel) analyzerOutPut).getEngineModel();
    if (emodel == null || !(emodel.getModel() instanceof ARIMAModel)) return null;

    model = (ARIMAModel) emodel.getModel();

    List<TextAndTableListEntity> textAndTableListEntityList =
        new ArrayList<TextAndTableListEntity>();
    List<DataTextAndTableListVisualizationOutPut> textAndTableListOutput =
        new ArrayList<DataTextAndTableListVisualizationOutPut>();

    Map<String, TextAndTableListEntity> nameEntityMap =
        new HashMap<String, TextAndTableListEntity>();

    VisualizationOutPut outputTable = null;

    List<SingleARIMAModel> modelList = model.getModels();
    if (StringUtil.isEmpty(model.getGroupColumnName())) { // no groupBY
      TextTable table = getVTextTable(modelList.get(0));
      TableEntity te = new TableEntity();

      generateTableEntity(table, te);

      TextAndTableListEntity textAndTableListEntity = new TextAndTableListEntity();
      textAndTableListEntity.addTableEntity(te);
      textAndTableListEntity.setText(getVTextText(modelList.get(0)));

      outputTable = new DataTextAndTableListVisualizationOutPut(textAndTableListEntity);
      outputTable.setName(analyzerOutPut.getAnalyticNode().getName());

    } else {
      String[] availableValue = new String[modelList.size()];
      int k = 0;

      for (SingleARIMAModel singleModel : modelList) {
        TextTable table = getVTextTable(singleModel); // default value is the first
        TableEntity te = new TableEntity();

        generateTableEntity(table, te);

        TextAndTableListEntity textAndTableListEntity = new TextAndTableListEntity();
        textAndTableListEntity.addTableEntity(te);
        textAndTableListEntity.setText(getVTextText(singleModel));

        textAndTableListEntityList.add(textAndTableListEntity);
        DataTextAndTableListVisualizationOutPut output =
            new DataTextAndTableListVisualizationOutPut(textAndTableListEntity);
        textAndTableListOutput.add(output);

        availableValue[k] = singleModel.getGroupColumnValue();

        nameEntityMap.put(availableValue[k], textAndTableListEntity);
        k++;
      }

      MultiTextAndTableListEntity tableList = new MultiTextAndTableListEntity();
      tableList.setTextAndTableListEntityList(textAndTableListEntityList);
      tableList.setAvaiableValue(availableValue);
      tableList.setNameEntityMap(nameEntityMap);

      outputTable = new MultiDataTextAndTableListVisualizationOutPut(tableList);
      outputTable.setName(analyzerOutPut.getAnalyticNode().getName());
      ((MultiDataTextAndTableListVisualizationOutPut) outputTable)
          .setTextAndTableListOutput(textAndTableListOutput);
    }
    return outputTable;
  }
Пример #5
0
  private void performOperation(
      DatabaseConnection databaseConnection, DataSet dataSet, Locale locale)
      throws AnalysisError, OperatorException {
    String outputTableName = getQuotaedTableName(getOutputSchema(), getOutputTable());

    String inputTableName = getQuotaedTableName(getInputSchema(), getInputTable());

    Columns atts = dataSet.getColumns();
    String dbType = databaseConnection.getProperties().getName();
    IDataSourceInfo dataSourceInfo = DataSourceInfoFactory.createConnectionInfo(dbType);

    IMultiDBUtility multiDBUtility = MultiDBUtilityFactory.createConnectionInfo(dbType);

    ISqlGeneratorMultiDB sqlGenerator = SqlGeneratorMultiDBFactory.createConnectionInfo(dbType);

    dropIfExist(dataSet);

    DatabaseUtil.alterParallel(databaseConnection, getOutputType()); // for oracle

    StringBuilder sb_create = new StringBuilder("create ");
    StringBuilder insertTable = new StringBuilder();

    if (getOutputType().equalsIgnoreCase("table")) {
      sb_create.append(" table ");
    } else {
      sb_create.append(" view ");
    }
    sb_create.append(outputTableName);
    sb_create.append(
        getOutputType().equalsIgnoreCase(Resources.TableType) ? getAppendOnlyString() : "");
    sb_create.append(DatabaseUtil.addParallel(databaseConnection, getOutputType())).append(" as (");
    StringBuilder selectSql = new StringBuilder(" select ");

    selectSql.append(StringHandler.doubleQ(groupColumn)).append(",");

    Column att = atts.get(columnNames);
    dataSet.computeColumnStatistics(att);
    if (att.isNumerical()) {
      logger.error("PivotTableAnalyzer cannot accept numeric type column");
      throw new AnalysisError(
          this,
          AnalysisErrorName.Not_numeric,
          locale,
          SDKLanguagePack.getMessage(SDKLanguagePack.PIVOT_NAME, locale));
    }
    String attName = StringHandler.doubleQ(att.getName());
    List<String> valueList = att.getMapping().getValues();
    if (!useArray
        && valueList.size() > Integer.parseInt(AlpineMinerConfig.PIVOT_DISTINCTVALUE_THRESHOLD)) {
      logger.error("Too many distinct value for column " + StringHandler.doubleQ(columnNames));
      throw new AnalysisError(
          this,
          AnalysisErrorName.Too_Many_Distinct_value,
          locale,
          StringHandler.doubleQ(columnNames),
          AlpineMinerConfig.PIVOT_DISTINCTVALUE_THRESHOLD);
    }

    if (valueList.size() <= 0) {
      logger.error("Empty table");
      throw new AnalysisError(this, AnalysisErrorName.Empty_table, locale);
    }

    String aggColumnName;
    if (!StringUtil.isEmpty(aggColumn)) {
      aggColumnName = StringHandler.doubleQ(aggColumn);
    } else {
      aggColumnName = "1";
    }

    Iterator<String> valueList_i = valueList.iterator();

    if (useArray) {
      if (dataSourceInfo.getDBType().equals(DataSourceInfoOracle.dBType)) {
        ArrayList<String> array = new ArrayList<String>();
        while (valueList_i.hasNext()) {
          String value = StringHandler.escQ(valueList_i.next());
          String newValue =
              "alpine_miner_null_to_0("
                  + aggrType
                  + " (case when "
                  + attName
                  + "="
                  + CommonUtility.quoteValue(dbType, att, value)
                  + " then "
                  + aggColumnName
                  + " end )) ";
          array.add(newValue);
        }
        selectSql.append(
            CommonUtility.array2OracleArray(array, CommonUtility.OracleDataType.Float));
      } else {
        selectSql.append(multiDBUtility.floatArrayHead());
        while (valueList_i.hasNext()) {
          String value = valueList_i.next();
          selectSql.append("alpine_miner_null_to_0(").append(aggrType);
          selectSql.append(" (case when ").append(attName).append("=");
          value = StringHandler.escQ(value);
          selectSql
              .append(CommonUtility.quoteValue(dbType, att, value))
              .append(" then ")
              .append(aggColumnName)
              .append(" end )) "); // else 0
          selectSql.append(",");
        }
        selectSql = selectSql.deleteCharAt(selectSql.length() - 1);
        selectSql.append(multiDBUtility.floatArrayTail());
      }
      selectSql.append(" " + StringHandler.doubleQ(att.getName()));
    } else {
      if (((DBTable) dataSet.getDBTable())
          .getDatabaseConnection()
          .getProperties()
          .getName()
          .equals(DataSourceInfoNZ.dBType)) {
        while (valueList_i.hasNext()) {
          String value = valueList_i.next();
          selectSql.append("(").append(aggrType);
          selectSql.append(" (case when ").append(attName).append("=");
          value = StringHandler.escQ(value);
          selectSql
              .append(CommonUtility.quoteValue(dbType, att, value))
              .append(" then ")
              .append(aggColumnName)
              .append(" end )) "); // else 0
          String colName = StringHandler.doubleQ(att.getName() + "_" + value);
          selectSql.append(colName);
          selectSql.append(",");
        }
        selectSql = selectSql.deleteCharAt(selectSql.length() - 1);
      } else if (((DBTable) dataSet.getDBTable())
          .getDatabaseConnection()
          .getProperties()
          .getName()
          .equals(DataSourceInfoDB2.dBType)) {
        while (valueList_i.hasNext()) {
          String value = valueList_i.next();
          selectSql.append("alpine_miner_null_to_0(").append(aggrType);
          selectSql.append(" (double(case when ").append(attName).append("=");
          value = StringHandler.escQ(value);
          selectSql
              .append(CommonUtility.quoteValue(dbType, att, value))
              .append(" then ")
              .append(aggColumnName)
              .append(" end ))) "); // else 0
          String colName = StringHandler.doubleQ(att.getName() + "_" + value);
          selectSql.append(colName);
          selectSql.append(",");
        }
        selectSql = selectSql.deleteCharAt(selectSql.length() - 1);
      } else {
        while (valueList_i.hasNext()) {
          String value = valueList_i.next();
          selectSql.append("alpine_miner_null_to_0(").append(aggrType);
          selectSql.append(" (case when ").append(attName).append("=");
          value = StringHandler.escQ(value);
          selectSql
              .append(CommonUtility.quoteValue(dbType, att, value))
              .append(" then ")
              .append(aggColumnName)
              .append(" end )) "); // else 0
          String colName = StringHandler.doubleQ(att.getName() + "_" + value);
          selectSql.append(colName);
          selectSql.append(",");
        }
        selectSql = selectSql.deleteCharAt(selectSql.length() - 1);
      }
    }
    selectSql.append(" from ").append(inputTableName).append(" foo group by ");
    selectSql.append(StringHandler.doubleQ(groupColumn));

    if (((DBTable) dataSet.getDBTable())
        .getDatabaseConnection()
        .getProperties()
        .getName()
        .equals(DataSourceInfoNZ.dBType)) {
      StringBuilder sb = new StringBuilder();
      sb.append("select ").append(StringHandler.doubleQ(groupColumn)).append(",");
      Iterator<String> valueList_new = valueList.iterator();
      while (valueList_new.hasNext()) {
        String value = valueList_new.next();
        String colName = StringHandler.doubleQ(att.getName() + "_" + value);
        sb.append("case when ").append(colName).append(" is null then 0 else ");
        sb.append(colName).append(" end ").append(colName).append(",");
      }
      sb = sb.deleteCharAt(sb.length() - 1);
      sb.append(" from (").append(selectSql).append(") foo ");
      selectSql = sb;
    }
    sb_create.append(selectSql).append(" )");

    if (getOutputType().equalsIgnoreCase("table")) {
      sb_create.append(getEndingString());
      insertTable.append(sqlGenerator.insertTable(selectSql.toString(), outputTableName));
    }
    try {
      Statement st = databaseConnection.createStatement(false);
      logger.debug("PivotTableAnalyzer.performOperation():sql=" + sb_create);
      st.execute(sb_create.toString());

      if (insertTable.length() > 0) {
        st.execute(insertTable.toString());
        logger.debug("PivotTableAnalyzer.performOperation():insertTableSql=" + insertTable);
      }
    } catch (SQLException e) {
      logger.error(e);
      if (e.getMessage().startsWith("ORA-03001")
          || e.getMessage().startsWith("ERROR:  invalid identifier")) {
        throw new AnalysisError(this, AnalysisErrorName.Invalid_Identifier, locale);
      } else {
        throw new OperatorException(e.getLocalizedMessage());
      }
    }
  }