@Override protected void _transform(Dataset data, boolean trainingMode) { // handle non-numeric types, extract columns to dummy variables Map<Object, Dataset.ColumnType> newColumns = new HashMap<>(); int n = data.size(); Iterator<Map.Entry<Object, Dataset.ColumnType>> it = data.getColumns().entrySet().iterator(); while (it.hasNext()) { Map.Entry<Object, Dataset.ColumnType> entry = it.next(); Object column = entry.getKey(); Dataset.ColumnType columnType = entry.getValue(); if (columnType == Dataset.ColumnType.CATEGORICAL || columnType == Dataset.ColumnType .ORDINAL) { // ordinal and categorical are converted into dummyvars // Remove the old column from the column map it.remove(); // create dummy variables for all the levels for (Record r : data) { if (!r.getX().containsKey(column)) { continue; // does not contain column } Object value = r.getX().get(column); // remove the column from data r.getX().remove(column); List<Object> newColumn = Arrays.<Object>asList(column, value); // add a new boolean feature with combination of column and value r.getX().put(newColumn, true); // add the new column in the list for insertion newColumns.put(newColumn, Dataset.ColumnType.DUMMYVAR); } } } // add the new columns in the dataset column map if (!newColumns.isEmpty()) { data.getColumns().putAll(newColumns); } }