Esempio n. 1
0
 /**
  * Shows histogram for selected binned attributes
  *
  * @param binningType type of discretization performed i.e., quantile, equal width or custom
  * @param xformResult name of the result discretized view
  * @param attributes names of attributes for which histogram is displayed
  */
 public static void displayDiscretizationResults(
     OraNumericalBinningType binningType, String xformResult, String[] attributes) {
   System.out.println("\nShowing results of the discretization transformation");
   System.out.println("\tType of discretization: " + binningType.name());
   java.sql.Connection dbConn = ((OraConnection) m_dmeConn).getDatabaseConnection();
   for (int i = 0; i < attributes.length; i++) {
     String sqlQuery =
         MessageFormat.format(
             "SELECT {0} BIN_NUMBER, COUNT(*) FREQUENCY FROM ({1}) GROUP BY {0} "
                 + "ORDER BY FREQUENCY DESC,BIN_NUMBER ASC",
             new String[] {"\"" + attributes[i] + "\"", xformResult});
     Statement stmt = null;
     ResultSet rs = null;
     System.out.println("\tHistogram for:" + attributes[i]);
     try {
       stmt = dbConn.createStatement();
       rs = stmt.executeQuery(sqlQuery);
       while (rs.next()) {
         String binValue = rs.getString("BIN_NUMBER");
         int freq = rs.getInt("FREQUENCY");
         System.out.println("\t\t" + binValue + " " + freq);
       }
     } catch (Exception e) {
       System.out.println(e);
     } finally {
       try {
         rs.close();
         stmt.close();
       } catch (Exception e) {
       }
     }
   }
 }
Esempio n. 2
0
  /**
   * Illustrates how to perform data discretization.
   *
   * @param resultXformName name of the result discretized view
   * @param binningType type of discretization to perform i.e., quantile, equal width or custom
   * @throws JDMException if discretization failed
   */
  public static void binData(String resultXformName, OraNumericalBinningType binningType)
      throws JDMException {
    // Schema where the original data and resulting transformations reside
    String schema = (m_dmeConn.getConnectionSpec().getName()).toUpperCase();

    // Create discretization transformation instance
    OraBinningTransform obt = m_xformFactory.createBinningTransform();
    obt.setTransformInputData(schema + "." + "MINING_DATA_BUILD_V");
    obt.setTransformOutputData(schema + "." + resultXformName);

    // Specify the number of numeric bins
    obt.setNumberOfBinsForNumerical(10);

    // Specify the number of categoric bins
    obt.setNumberOfBinsForCategorical(8);

    // Specify the list of excluded attributes
    String[] excludedList = new String[] {"CUST_ID", "CUST_GENDER"};
    obt.setExcludeColumnList(excludedList);

    // Specify the type of numeric binning: equal-width or quantile
    // ( default is quantile )
    obt.setNumericalBinningType(binningType);
    // Specify the type of categorical binning as Top-N: by default it is none
    obt.setCategoricalBinningType(OraCategoricalBinningType.top_n);

    ArrayList xformList = new ArrayList();
    xformList.add(obt);
    // Create a transformation sequence object
    OraTransformationSequence xformSeq =
        m_xformFactory.createTransformationSequence(
            schema + "." + "MINING_DATA_BUILD_V", // name of the input data set
            xformList, // List of transformations. In this case only one type of transformation
            // i.e., supervised binning
            schema + "." + resultXformName // name of the transformation result
            );
    String xformSeqName = "bin_" + binningType.name() + "_xfSeq";
    m_dmeConn.saveObject(xformSeqName, xformSeq, true);

    OraTransformationTask xformTask = m_xformTaskFactory.create(xformSeqName, false);

    executeTask(xformTask, "xFormBin_jdm");
    displayDiscretizationResults(
        binningType,
        schema + "." + resultXformName,
        new String[] {"CUST_INCOME_LEVEL", "OCCUPATION"});
  }