/** * 1.按照过滤条件读取数据<br> * 2.按照任务配置里设定的离散化方法,进行离散化<br> * 3.按照任务配置里的时间粒度,对数据进行聚合 * * @param filterCondition 过滤条件 * @param doAggregate 是否按时间对数据进行聚合 * @param doDiscretize 是否考虑离散化 * @return */ private DataItems readInput(String filterCondition, boolean doAggregate, boolean doDiscretize) { String sqlStr = "SELECT 事件发生时间," + task.getMiningObject() + " " + "FROM " + conn.DB_TABLE + " WHERE "; if (task.getFilterCondition().length() > 0) sqlStr += task.getFilterCondition() + " AND "; if (filterCondition != null && filterCondition.length() > 0) sqlStr += filterCondition + " AND "; sqlStr += "1=1 ORDER BY 事件发生时间 asc"; // 按时间先后顺序读取数据 conn.closeConn(); ResultSet rs = conn.sqlQuery(sqlStr); if (rs == null) { return null; } ResultSetMetaData meta = null; int numRecords = 0; try { meta = rs.getMetaData(); int numCols = meta.getColumnCount(); data = new DataItems(); while (rs.next()) { numRecords++; StringBuilder sb = new StringBuilder(); for (int i = 2; i <= numCols; i++) if (rs.getString(i) != null) sb.append(rs.getString(i).trim() + ","); if (sb.length() > 0) { Date d = parseTime(rs.getString(1).trim()); if (d != null) data.add1Data(d, sb.substring(0, sb.length() - 1)); else System.out.println(""); } } rs.close(); } catch (SQLException e) { e.printStackTrace(); } System.out.println("共" + numRecords + "条记录!"); System.out.println("读取完毕:" + data.getLength() + "条记录!"); boolean isNonDouble = !data.isAllDataIsDouble(); // 先进行时间粒度上的聚合 if (doAggregate) data = DataPretreatment.aggregateData( data, task.getGranularity(), task.getAggregateMethod(), isNonDouble); // 再进行离散化(只有数值型才能够离散化,否则应该会报错!) if (doDiscretize) data = DataPretreatment.toDiscreteNumbers( data, task.getDiscreteMethod(), task.getDiscreteDimension(), task.getDiscreteEndNodes()); data.setGranularity(task.getGranularity()); // 设置数据的一些参数,如粒度 String endNodes = data.discreteNodes(); task.setDiscreteEndNodes(endNodes); return data; }