/** * Illustrates how to perform data normalization * * @param normalizeType type of normalization to perform * @param xformResult name of the result transformation view * @throws JDMException if transformation failed */ public static void normalizeData(OraNormalizeType normalizeType, String xformResult) throws JDMException { // Schema where the original data and resulting transformations reside String schema = (m_dmeConn.getConnectionSpec().getName()).toUpperCase(); OraNormalizeTransform ont = m_xformFactory.createNormalizeTransform(normalizeType, new Integer(6)); // Specify the list of excluded attributes String[] excludedList = new String[] {"CUST_ID", "CUST_GENDER"}; ont.setExcludeColumnList(excludedList); ArrayList xformList = new ArrayList(); xformList.add(ont); // Create a transformation sequence object OraTransformationSequence xformSeq = m_xformFactory.createTransformationSequence( schema + "." + "MINING_DATA_BUILD_V", // name of the input data set xformList, // List of transformations. In this case only one type of transformation // i.e., supervised binning schema + "." + xformResult // name of the transformation result ); String xformSeqName = "nmz_" + normalizeType.name() + "_xfSeq"; m_dmeConn.saveObject(xformSeqName, xformSeq, true); OraTransformationTask xformTask = m_xformTaskFactory.create(xformSeqName, false); executeTask(xformTask, "xformNormalize_jdm"); displayNormalizeResults(schema, "AGE", normalizeType, xformResult); }
/** * Illustrates how to perform data discretization. * * @param resultXformName name of the result discretized view * @param binningType type of discretization to perform i.e., quantile, equal width or custom * @throws JDMException if discretization failed */ public static void binData(String resultXformName, OraNumericalBinningType binningType) throws JDMException { // Schema where the original data and resulting transformations reside String schema = (m_dmeConn.getConnectionSpec().getName()).toUpperCase(); // Create discretization transformation instance OraBinningTransform obt = m_xformFactory.createBinningTransform(); obt.setTransformInputData(schema + "." + "MINING_DATA_BUILD_V"); obt.setTransformOutputData(schema + "." + resultXformName); // Specify the number of numeric bins obt.setNumberOfBinsForNumerical(10); // Specify the number of categoric bins obt.setNumberOfBinsForCategorical(8); // Specify the list of excluded attributes String[] excludedList = new String[] {"CUST_ID", "CUST_GENDER"}; obt.setExcludeColumnList(excludedList); // Specify the type of numeric binning: equal-width or quantile // ( default is quantile ) obt.setNumericalBinningType(binningType); // Specify the type of categorical binning as Top-N: by default it is none obt.setCategoricalBinningType(OraCategoricalBinningType.top_n); ArrayList xformList = new ArrayList(); xformList.add(obt); // Create a transformation sequence object OraTransformationSequence xformSeq = m_xformFactory.createTransformationSequence( schema + "." + "MINING_DATA_BUILD_V", // name of the input data set xformList, // List of transformations. In this case only one type of transformation // i.e., supervised binning schema + "." + resultXformName // name of the transformation result ); String xformSeqName = "bin_" + binningType.name() + "_xfSeq"; m_dmeConn.saveObject(xformSeqName, xformSeq, true); OraTransformationTask xformTask = m_xformTaskFactory.create(xformSeqName, false); executeTask(xformTask, "xFormBin_jdm"); displayDiscretizationResults( binningType, schema + "." + resultXformName, new String[] {"CUST_INCOME_LEVEL", "OCCUPATION"}); }
/** * Illustrates how to perform data clipping * * @param clippingType type of clipping to perform * @param xformResult name of the result transformation view * @throws JDMException if transformation failed */ public static void clipData(OraClippingType clippingType, String xformResult) throws JDMException { // Schema where the original data and resulting transformations reside String schema = (m_dmeConn.getConnectionSpec().getName()).toUpperCase(); OraClippingTransform oct = m_xformFactory.createClippingTransform(); oct.setTransformInputData(schema + "." + "MINING_DATA_BUILD_V"); oct.setTransformOutputData(schema + "." + xformResult); // Specify the list of excluded attributes String[] excludedList = new String[] {"CUST_ID", "CUST_GENDER"}; oct.setExcludeColumnList(excludedList); // Specify the type of clipping: trim of winsorize ( default is trimming). oct.setClippingType(clippingType); // Specify the tail fraction as 3% of values on both ends oct.setTailFraction(0.03); ArrayList xformList = new ArrayList(); xformList.add(oct); // Create a transformation sequence object OraTransformationSequence xformSeq = m_xformFactory.createTransformationSequence( schema + "." + "MINING_DATA_BUILD_V", // name of the input data set xformList, // List of transformations. In this case only one type of transformation // i.e., supervised binning schema + "." + xformResult // name of the transformation result ); String xformSeqName = "clp_" + clippingType.name() + "_xfSeq"; m_dmeConn.saveObject(xformSeqName, xformSeq, true); OraTransformationTask xformTask = m_xformTaskFactory.create(xformSeqName, false); executeTask(xformTask, "xfromClip_jdm"); displayClippingResults(schema, xformResult, "AGE", clippingType); }
/** * For supervised functions with the known target attribute, supervised binning is a recommended * approach. It is a smart binning based on the target attribute values. This method is supported * from 11.1 release of ODM. */ public static void binSupervised() throws JDMException { // Schema where the original data and resulting transformations reside String schema = (m_dmeConn.getConnectionSpec().getName()).toUpperCase(); // Create discretization transformation instance OraBinningTransform obt = m_xformFactory.createBinningTransform(); // Specify the list of excluded attributes String[] excludedList = new String[] {"CUST_ID", "CUST_GENDER"}; obt.setExcludeColumnList(excludedList); // Specify the type of numeric binning: supervised obt.setNumericalBinningType(OraNumericalBinningType.supervised); // Specify the type of categorical binning as supervised obt.setCategoricalBinningType(OraCategoricalBinningType.supervised); obt.setTargetAttributeName("AFFINITY_CARD"); ArrayList xformList = new ArrayList(); xformList.add(obt); // Create a transformation sequence object OraTransformationSequence xformSeq = m_xformFactory.createTransformationSequence( schema + "." + "MINING_DATA_BUILD_V", // name of the input data set xformList, // List of transformations. In this case only one type of transformation // i.e., supervised binning schema + "." + "MINING_DATA_BINNED_SUP" // name of the transformation result ); m_dmeConn.saveObject("superBin_xformSeq", xformSeq, true); OraTransformationTask xformTask = m_xformTaskFactory.create("superBin_xformSeq", false); executeTask(xformTask, "xFormSuperBin_jdm"); displayDiscretizationResults( OraNumericalBinningType.supervised, schema + "." + "MINING_DATA_BINNED_SUP", new String[] {"CUST_INCOME_LEVEL", "OCCUPATION"}); }
/** * Illustrates how to perform custom data discretization. First discretization of 2 numerical * attributes is performed. "AGE" is binned with equal width method with 10 bins and * "YRS_RESIDENCE" with quantile method and 5 bins. Categorical attributes "EDUCATION" and * "OCCUPATION" are discretized with the Top-N method into 15 and 10 bins. This method illustrates * how additional attributes can be added to the existing discretization tables: "AFFINITY_CARD" * and ""HOUSEHOLD_SIZE". Finally results are combined into a single array and custom * transformation task is performed. * * @param resultXformName name of the result transformation view * @throws JDMException if transformation failed */ public static void binDataCustom(String resultXformName) throws JDMException { System.out.println("Custom binning"); System.out.println("--------------------------------------------"); // Schema where the original data and resulting transformations reside String schema = (m_dmeConn.getConnectionSpec().getName()).toUpperCase(); // Numeric custom binning OraNumericalAttributeBins[] customNumBins = m_binXformFactory.computeNumericBins( schema + "." + "MINING_DATA_BUILD_V", // name of the input data set new String[] { "AGE", "YRS_RESIDENCE", }, new OraNumericalBinningType[] { OraNumericalBinningType.equi_width, OraNumericalBinningType.quantile }, new Integer[] {new Integer(10), new Integer(5)}); if (customNumBins == null) { System.out.println("Error: no numeric bins were computed"); return; } // Categoric custom binning OraCategoricalAttributeBins[] customCatBins = m_binXformFactory.computeCategoricBins( schema + "." + "MINING_DATA_BUILD_V", // name of the input data set new String[] { "EDUCATION", "OCCUPATION", }, new OraCategoricalBinningType[] { OraCategoricalBinningType.top_n, OraCategoricalBinningType.top_n, }, new Integer[] { new Integer(15), new Integer(10), }); if (customCatBins == null) { System.out.println("Error: no categoric bins were computed"); return; } // combine custom bins into the single array OraAttributeBins[] customBins = null; customBins = incrementArray(customBins, (OraAttributeBins[]) customNumBins); customBins = incrementArray(customBins, (OraAttributeBins[]) customCatBins); // show resulting array of custom bins for (int i = 0; i < customBins.length; i++) { System.out.println("Attribute:" + customBins[i].getAttributeName()); if (customBins[i] instanceof OraNumericalAttributeBins) { OraNumericalAttributeBins oraNumBin = (OraNumericalAttributeBins) customBins[i]; OraNumericalBin[] bs = oraNumBin.getBins(); System.out.println("\tBin ID\tLower\tUpper"); for (int j = 0; j < bs.length; j++) { System.out.println( "\t" + bs[j].getBinID() + "\t" + m_df.format(bs[j].getStartValue()) + "\t" + m_df.format(bs[j].getEndValue())); } } else if (customBins[i] instanceof OraCategoricalAttributeBins) { OraCategoricalAttributeBins oraCatBin = (OraCategoricalAttributeBins) customBins[i]; OraCategoricalBin[] bs = oraCatBin.getBins(); System.out.println("\tBin ID\tCategory"); for (int j = 0; j < bs.length; j++) { Object[] categories = bs[j].getCategories(); System.out.print("\t" + bs[j].getBinID() + "\t"); for (int k = 0; k < categories.length; k++) { System.out.print(categories[k].toString()); if (k < categories.length - 1) System.out.print(";"); } System.out.println(); } } } // Create discretization transformation instance OraBinningTransform obt = m_xformFactory.createBinningTransform(customBins); // Specify the type of numeric binning: custom obt.setNumericalBinningType(OraNumericalBinningType.custom); // Specify the type of categoric binning: custom obt.setCategoricalBinningType(OraCategoricalBinningType.custom); ArrayList xformList = new ArrayList(); xformList.add(obt); // Create a transformation sequence object OraTransformationSequence xformSeq = m_xformFactory.createTransformationSequence( schema + "." + "MINING_DATA_BUILD_V", // name of the input data set xformList, // List of transformations. In this case only one type of transformation // i.e., supervised binning schema + "." + resultXformName // name of the transformation result ); String xformSeqName = "bin_" + OraNumericalBinningType.custom.name() + "_xfSeq"; m_dmeConn.saveObject(xformSeqName, xformSeq, true); OraTransformationTask xformTask = m_xformTaskFactory.create(xformSeqName, true); executeTask(xformTask, "xCustomBin_jdm"); // display content of the bin definition tables showBinDefinitionTableContents(obt.getCategoricalBinTable(), "categorical"); showBinDefinitionTableContents(obt.getNumericalBinTable(), "numerical"); displayDiscretizationResults( OraNumericalBinningType.custom, schema + "." + resultXformName, new String[] {"AGE", "CUST_INCOME_LEVEL", "EDUCATION", "OCCUPATION"}); // Bin additional attribute and add // to the existing bin definition tables // ---------------------------------------------------------------- // Numeric custom binning OraNumericalAttributeBins[] customNumBinsAdd = m_binXformFactory.computeNumericBins( schema + "." + "MINING_DATA_BUILD_V", // name of the input data set new String[] { "AFFINITY_CARD", }, new OraNumericalBinningType[] { OraNumericalBinningType.quantile, }, new Integer[] {new Integer(6)}); if (customNumBins == null) { System.out.println("Error: no numeric bins were computed"); return; } // Categoric custom binning OraCategoricalAttributeBins[] customCatBinsAdd = m_binXformFactory.computeCategoricBins( schema + "." + "MINING_DATA_BUILD_V", // name of the input data set new String[] { "HOUSEHOLD_SIZE", }, new OraCategoricalBinningType[] { OraCategoricalBinningType.top_n, }, new Integer[] { new Integer(15), new Integer(10), }); if (customCatBins == null) { System.out.println("Error: no categoric bins were computed"); return; } // combine custom bins into the single array OraAttributeBins[] customBinsAdd = null; customBinsAdd = incrementArray(customBinsAdd, (OraAttributeBins[]) customNumBinsAdd); customBinsAdd = incrementArray(customBinsAdd, (OraAttributeBins[]) customCatBinsAdd); // clean up previous view dropView(null, resultXformName); OraBinningTransform obtAdd = m_xformFactory.createBinningTransform( obt.getCategoricalBinTable(), obt.getNumericalBinTable(), customBinsAdd); obtAdd.setTransformInputData(schema + "." + "MINING_DATA_BUILD_V"); obtAdd.setTransformOutputData(schema + "." + resultXformName); // Specify the type of numeric binning: custom obtAdd.setNumericalBinningType(OraNumericalBinningType.custom); // Specify the type of categoric binning: custom obtAdd.setCategoricalBinningType(OraCategoricalBinningType.custom); OraTransformationTask xformTaskAdd = m_xformTaskFactory.create(obtAdd); executeTask(xformTaskAdd, "xCustomBinAdd_jdm"); // display content of the new bin definition tables showBinDefinitionTableContents(obtAdd.getCategoricalBinTable(), "categorical"); showBinDefinitionTableContents(obtAdd.getNumericalBinTable(), "numerical"); // show results displayDiscretizationResults( OraNumericalBinningType.custom, schema + "." + resultXformName, new String[] {"AFFINITY_CARD", "HOUSEHOLD_SIZE"}); }