/** * Shows histogram for selected binned attributes * * @param binningType type of discretization performed i.e., quantile, equal width or custom * @param xformResult name of the result discretized view * @param attributes names of attributes for which histogram is displayed */ public static void displayDiscretizationResults( OraNumericalBinningType binningType, String xformResult, String[] attributes) { System.out.println("\nShowing results of the discretization transformation"); System.out.println("\tType of discretization: " + binningType.name()); java.sql.Connection dbConn = ((OraConnection) m_dmeConn).getDatabaseConnection(); for (int i = 0; i < attributes.length; i++) { String sqlQuery = MessageFormat.format( "SELECT {0} BIN_NUMBER, COUNT(*) FREQUENCY FROM ({1}) GROUP BY {0} " + "ORDER BY FREQUENCY DESC,BIN_NUMBER ASC", new String[] {"\"" + attributes[i] + "\"", xformResult}); Statement stmt = null; ResultSet rs = null; System.out.println("\tHistogram for:" + attributes[i]); try { stmt = dbConn.createStatement(); rs = stmt.executeQuery(sqlQuery); while (rs.next()) { String binValue = rs.getString("BIN_NUMBER"); int freq = rs.getInt("FREQUENCY"); System.out.println("\t\t" + binValue + " " + freq); } } catch (Exception e) { System.out.println(e); } finally { try { rs.close(); stmt.close(); } catch (Exception e) { } } } }
/** * Illustrates how to perform data discretization. * * @param resultXformName name of the result discretized view * @param binningType type of discretization to perform i.e., quantile, equal width or custom * @throws JDMException if discretization failed */ public static void binData(String resultXformName, OraNumericalBinningType binningType) throws JDMException { // Schema where the original data and resulting transformations reside String schema = (m_dmeConn.getConnectionSpec().getName()).toUpperCase(); // Create discretization transformation instance OraBinningTransform obt = m_xformFactory.createBinningTransform(); obt.setTransformInputData(schema + "." + "MINING_DATA_BUILD_V"); obt.setTransformOutputData(schema + "." + resultXformName); // Specify the number of numeric bins obt.setNumberOfBinsForNumerical(10); // Specify the number of categoric bins obt.setNumberOfBinsForCategorical(8); // Specify the list of excluded attributes String[] excludedList = new String[] {"CUST_ID", "CUST_GENDER"}; obt.setExcludeColumnList(excludedList); // Specify the type of numeric binning: equal-width or quantile // ( default is quantile ) obt.setNumericalBinningType(binningType); // Specify the type of categorical binning as Top-N: by default it is none obt.setCategoricalBinningType(OraCategoricalBinningType.top_n); ArrayList xformList = new ArrayList(); xformList.add(obt); // Create a transformation sequence object OraTransformationSequence xformSeq = m_xformFactory.createTransformationSequence( schema + "." + "MINING_DATA_BUILD_V", // name of the input data set xformList, // List of transformations. In this case only one type of transformation // i.e., supervised binning schema + "." + resultXformName // name of the transformation result ); String xformSeqName = "bin_" + binningType.name() + "_xfSeq"; m_dmeConn.saveObject(xformSeqName, xformSeq, true); OraTransformationTask xformTask = m_xformTaskFactory.create(xformSeqName, false); executeTask(xformTask, "xFormBin_jdm"); displayDiscretizationResults( binningType, schema + "." + resultXformName, new String[] {"CUST_INCOME_LEVEL", "OCCUPATION"}); }