예제 #1
0
  /**
   * Illustrates how to perform data normalization
   *
   * @param normalizeType type of normalization to perform
   * @param xformResult name of the result transformation view
   * @throws JDMException if transformation failed
   */
  public static void normalizeData(OraNormalizeType normalizeType, String xformResult)
      throws JDMException {
    // Schema where the original data and resulting transformations reside
    String schema = (m_dmeConn.getConnectionSpec().getName()).toUpperCase();

    OraNormalizeTransform ont =
        m_xformFactory.createNormalizeTransform(normalizeType, new Integer(6));

    // Specify the list of excluded attributes
    String[] excludedList = new String[] {"CUST_ID", "CUST_GENDER"};
    ont.setExcludeColumnList(excludedList);

    ArrayList xformList = new ArrayList();
    xformList.add(ont);
    // Create a transformation sequence object
    OraTransformationSequence xformSeq =
        m_xformFactory.createTransformationSequence(
            schema + "." + "MINING_DATA_BUILD_V", // name of the input data set
            xformList, // List of transformations. In this case only one type of transformation
            // i.e., supervised binning
            schema + "." + xformResult // name of the transformation result
            );
    String xformSeqName = "nmz_" + normalizeType.name() + "_xfSeq";
    m_dmeConn.saveObject(xformSeqName, xformSeq, true);

    OraTransformationTask xformTask = m_xformTaskFactory.create(xformSeqName, false);
    executeTask(xformTask, "xformNormalize_jdm");
    displayNormalizeResults(schema, "AGE", normalizeType, xformResult);
  }
예제 #2
0
  /**
   * Illustrates how to perform data discretization.
   *
   * @param resultXformName name of the result discretized view
   * @param binningType type of discretization to perform i.e., quantile, equal width or custom
   * @throws JDMException if discretization failed
   */
  public static void binData(String resultXformName, OraNumericalBinningType binningType)
      throws JDMException {
    // Schema where the original data and resulting transformations reside
    String schema = (m_dmeConn.getConnectionSpec().getName()).toUpperCase();

    // Create discretization transformation instance
    OraBinningTransform obt = m_xformFactory.createBinningTransform();
    obt.setTransformInputData(schema + "." + "MINING_DATA_BUILD_V");
    obt.setTransformOutputData(schema + "." + resultXformName);

    // Specify the number of numeric bins
    obt.setNumberOfBinsForNumerical(10);

    // Specify the number of categoric bins
    obt.setNumberOfBinsForCategorical(8);

    // Specify the list of excluded attributes
    String[] excludedList = new String[] {"CUST_ID", "CUST_GENDER"};
    obt.setExcludeColumnList(excludedList);

    // Specify the type of numeric binning: equal-width or quantile
    // ( default is quantile )
    obt.setNumericalBinningType(binningType);
    // Specify the type of categorical binning as Top-N: by default it is none
    obt.setCategoricalBinningType(OraCategoricalBinningType.top_n);

    ArrayList xformList = new ArrayList();
    xformList.add(obt);
    // Create a transformation sequence object
    OraTransformationSequence xformSeq =
        m_xformFactory.createTransformationSequence(
            schema + "." + "MINING_DATA_BUILD_V", // name of the input data set
            xformList, // List of transformations. In this case only one type of transformation
            // i.e., supervised binning
            schema + "." + resultXformName // name of the transformation result
            );
    String xformSeqName = "bin_" + binningType.name() + "_xfSeq";
    m_dmeConn.saveObject(xformSeqName, xformSeq, true);

    OraTransformationTask xformTask = m_xformTaskFactory.create(xformSeqName, false);

    executeTask(xformTask, "xFormBin_jdm");
    displayDiscretizationResults(
        binningType,
        schema + "." + resultXformName,
        new String[] {"CUST_INCOME_LEVEL", "OCCUPATION"});
  }
예제 #3
0
  /**
   * Illustrates how to perform data clipping
   *
   * @param clippingType type of clipping to perform
   * @param xformResult name of the result transformation view
   * @throws JDMException if transformation failed
   */
  public static void clipData(OraClippingType clippingType, String xformResult)
      throws JDMException {
    // Schema where the original data and resulting transformations reside
    String schema = (m_dmeConn.getConnectionSpec().getName()).toUpperCase();

    OraClippingTransform oct = m_xformFactory.createClippingTransform();
    oct.setTransformInputData(schema + "." + "MINING_DATA_BUILD_V");
    oct.setTransformOutputData(schema + "." + xformResult);

    // Specify the list of excluded attributes
    String[] excludedList = new String[] {"CUST_ID", "CUST_GENDER"};
    oct.setExcludeColumnList(excludedList);

    // Specify the type of clipping: trim of winsorize ( default is trimming).
    oct.setClippingType(clippingType);

    // Specify the tail fraction as 3% of values on both ends
    oct.setTailFraction(0.03);

    ArrayList xformList = new ArrayList();
    xformList.add(oct);
    // Create a transformation sequence object
    OraTransformationSequence xformSeq =
        m_xformFactory.createTransformationSequence(
            schema + "." + "MINING_DATA_BUILD_V", // name of the input data set
            xformList, // List of transformations. In this case only one type of transformation
            // i.e., supervised binning
            schema + "." + xformResult // name of the transformation result
            );
    String xformSeqName = "clp_" + clippingType.name() + "_xfSeq";
    m_dmeConn.saveObject(xformSeqName, xformSeq, true);

    OraTransformationTask xformTask = m_xformTaskFactory.create(xformSeqName, false);
    executeTask(xformTask, "xfromClip_jdm");

    displayClippingResults(schema, xformResult, "AGE", clippingType);
  }
예제 #4
0
  /**
   * For supervised functions with the known target attribute, supervised binning is a recommended
   * approach. It is a smart binning based on the target attribute values. This method is supported
   * from 11.1 release of ODM.
   */
  public static void binSupervised() throws JDMException {
    // Schema where the original data and resulting transformations reside
    String schema = (m_dmeConn.getConnectionSpec().getName()).toUpperCase();

    // Create discretization transformation instance
    OraBinningTransform obt = m_xformFactory.createBinningTransform();

    // Specify the list of excluded attributes
    String[] excludedList = new String[] {"CUST_ID", "CUST_GENDER"};
    obt.setExcludeColumnList(excludedList);

    // Specify the type of numeric binning: supervised
    obt.setNumericalBinningType(OraNumericalBinningType.supervised);
    // Specify the type of categorical binning as supervised
    obt.setCategoricalBinningType(OraCategoricalBinningType.supervised);
    obt.setTargetAttributeName("AFFINITY_CARD");

    ArrayList xformList = new ArrayList();
    xformList.add(obt);
    // Create a transformation sequence object
    OraTransformationSequence xformSeq =
        m_xformFactory.createTransformationSequence(
            schema + "." + "MINING_DATA_BUILD_V", // name of the input data set
            xformList, // List of transformations. In this case only one type of transformation
            // i.e., supervised binning
            schema + "." + "MINING_DATA_BINNED_SUP" // name of the transformation result
            );
    m_dmeConn.saveObject("superBin_xformSeq", xformSeq, true);

    OraTransformationTask xformTask = m_xformTaskFactory.create("superBin_xformSeq", false);
    executeTask(xformTask, "xFormSuperBin_jdm");
    displayDiscretizationResults(
        OraNumericalBinningType.supervised,
        schema + "." + "MINING_DATA_BINNED_SUP",
        new String[] {"CUST_INCOME_LEVEL", "OCCUPATION"});
  }
예제 #5
0
  /**
   * Illustrates how to perform custom data discretization. First discretization of 2 numerical
   * attributes is performed. "AGE" is binned with equal width method with 10 bins and
   * "YRS_RESIDENCE" with quantile method and 5 bins. Categorical attributes "EDUCATION" and
   * "OCCUPATION" are discretized with the Top-N method into 15 and 10 bins. This method illustrates
   * how additional attributes can be added to the existing discretization tables: "AFFINITY_CARD"
   * and ""HOUSEHOLD_SIZE". Finally results are combined into a single array and custom
   * transformation task is performed.
   *
   * @param resultXformName name of the result transformation view
   * @throws JDMException if transformation failed
   */
  public static void binDataCustom(String resultXformName) throws JDMException {
    System.out.println("Custom binning");
    System.out.println("--------------------------------------------");
    // Schema where the original data and resulting transformations reside
    String schema = (m_dmeConn.getConnectionSpec().getName()).toUpperCase();

    // Numeric custom binning
    OraNumericalAttributeBins[] customNumBins =
        m_binXformFactory.computeNumericBins(
            schema + "." + "MINING_DATA_BUILD_V", // name of the input data set
            new String[] {
              "AGE", "YRS_RESIDENCE",
            },
            new OraNumericalBinningType[] {
              OraNumericalBinningType.equi_width, OraNumericalBinningType.quantile
            },
            new Integer[] {new Integer(10), new Integer(5)});

    if (customNumBins == null) {
      System.out.println("Error: no numeric bins were computed");
      return;
    }

    // Categoric custom binning
    OraCategoricalAttributeBins[] customCatBins =
        m_binXformFactory.computeCategoricBins(
            schema + "." + "MINING_DATA_BUILD_V", // name of the input data set
            new String[] {
              "EDUCATION", "OCCUPATION",
            },
            new OraCategoricalBinningType[] {
              OraCategoricalBinningType.top_n, OraCategoricalBinningType.top_n,
            },
            new Integer[] {
              new Integer(15), new Integer(10),
            });

    if (customCatBins == null) {
      System.out.println("Error: no categoric bins were computed");
      return;
    }

    // combine custom bins into the single array
    OraAttributeBins[] customBins = null;
    customBins = incrementArray(customBins, (OraAttributeBins[]) customNumBins);
    customBins = incrementArray(customBins, (OraAttributeBins[]) customCatBins);

    // show resulting array of custom bins
    for (int i = 0; i < customBins.length; i++) {
      System.out.println("Attribute:" + customBins[i].getAttributeName());

      if (customBins[i] instanceof OraNumericalAttributeBins) {
        OraNumericalAttributeBins oraNumBin = (OraNumericalAttributeBins) customBins[i];
        OraNumericalBin[] bs = oraNumBin.getBins();
        System.out.println("\tBin ID\tLower\tUpper");
        for (int j = 0; j < bs.length; j++) {
          System.out.println(
              "\t"
                  + bs[j].getBinID()
                  + "\t"
                  + m_df.format(bs[j].getStartValue())
                  + "\t"
                  + m_df.format(bs[j].getEndValue()));
        }
      } else if (customBins[i] instanceof OraCategoricalAttributeBins) {
        OraCategoricalAttributeBins oraCatBin = (OraCategoricalAttributeBins) customBins[i];
        OraCategoricalBin[] bs = oraCatBin.getBins();
        System.out.println("\tBin ID\tCategory");
        for (int j = 0; j < bs.length; j++) {
          Object[] categories = bs[j].getCategories();
          System.out.print("\t" + bs[j].getBinID() + "\t");
          for (int k = 0; k < categories.length; k++) {
            System.out.print(categories[k].toString());
            if (k < categories.length - 1) System.out.print(";");
          }
          System.out.println();
        }
      }
    }
    // Create discretization transformation instance
    OraBinningTransform obt = m_xformFactory.createBinningTransform(customBins);

    // Specify the type of numeric binning: custom
    obt.setNumericalBinningType(OraNumericalBinningType.custom);
    // Specify the type of categoric binning: custom
    obt.setCategoricalBinningType(OraCategoricalBinningType.custom);

    ArrayList xformList = new ArrayList();
    xformList.add(obt);
    // Create a transformation sequence object
    OraTransformationSequence xformSeq =
        m_xformFactory.createTransformationSequence(
            schema + "." + "MINING_DATA_BUILD_V", // name of the input data set
            xformList, // List of transformations. In this case only one type of transformation
            // i.e., supervised binning
            schema + "." + resultXformName // name of the transformation result
            );
    String xformSeqName = "bin_" + OraNumericalBinningType.custom.name() + "_xfSeq";
    m_dmeConn.saveObject(xformSeqName, xformSeq, true);

    OraTransformationTask xformTask = m_xformTaskFactory.create(xformSeqName, true);
    executeTask(xformTask, "xCustomBin_jdm");

    // display content of the bin definition tables
    showBinDefinitionTableContents(obt.getCategoricalBinTable(), "categorical");
    showBinDefinitionTableContents(obt.getNumericalBinTable(), "numerical");

    displayDiscretizationResults(
        OraNumericalBinningType.custom,
        schema + "." + resultXformName,
        new String[] {"AGE", "CUST_INCOME_LEVEL", "EDUCATION", "OCCUPATION"});

    // Bin additional attribute and add
    // to the existing bin definition tables
    // ----------------------------------------------------------------
    // Numeric custom binning
    OraNumericalAttributeBins[] customNumBinsAdd =
        m_binXformFactory.computeNumericBins(
            schema + "." + "MINING_DATA_BUILD_V", // name of the input data set
            new String[] {
              "AFFINITY_CARD",
            },
            new OraNumericalBinningType[] {
              OraNumericalBinningType.quantile,
            },
            new Integer[] {new Integer(6)});

    if (customNumBins == null) {
      System.out.println("Error: no numeric bins were computed");
      return;
    }

    // Categoric custom binning
    OraCategoricalAttributeBins[] customCatBinsAdd =
        m_binXformFactory.computeCategoricBins(
            schema + "." + "MINING_DATA_BUILD_V", // name of the input data set
            new String[] {
              "HOUSEHOLD_SIZE",
            },
            new OraCategoricalBinningType[] {
              OraCategoricalBinningType.top_n,
            },
            new Integer[] {
              new Integer(15), new Integer(10),
            });

    if (customCatBins == null) {
      System.out.println("Error: no categoric bins were computed");
      return;
    }

    // combine custom bins into the single array
    OraAttributeBins[] customBinsAdd = null;
    customBinsAdd = incrementArray(customBinsAdd, (OraAttributeBins[]) customNumBinsAdd);
    customBinsAdd = incrementArray(customBinsAdd, (OraAttributeBins[]) customCatBinsAdd);

    // clean up previous view
    dropView(null, resultXformName);

    OraBinningTransform obtAdd =
        m_xformFactory.createBinningTransform(
            obt.getCategoricalBinTable(), obt.getNumericalBinTable(), customBinsAdd);
    obtAdd.setTransformInputData(schema + "." + "MINING_DATA_BUILD_V");
    obtAdd.setTransformOutputData(schema + "." + resultXformName);

    // Specify the type of numeric binning: custom
    obtAdd.setNumericalBinningType(OraNumericalBinningType.custom);
    // Specify the type of categoric binning: custom
    obtAdd.setCategoricalBinningType(OraCategoricalBinningType.custom);

    OraTransformationTask xformTaskAdd = m_xformTaskFactory.create(obtAdd);
    executeTask(xformTaskAdd, "xCustomBinAdd_jdm");

    // display content of the new bin definition tables
    showBinDefinitionTableContents(obtAdd.getCategoricalBinTable(), "categorical");
    showBinDefinitionTableContents(obtAdd.getNumericalBinTable(), "numerical");

    // show results
    displayDiscretizationResults(
        OraNumericalBinningType.custom,
        schema + "." + resultXformName,
        new String[] {"AFFINITY_CARD", "HOUSEHOLD_SIZE"});
  }