public static DataItems string2DataItems(String str) { // 将输出结果的字符串转成DataItems的形式 DataItems ans = new DataItems(); if (str == null || str.length() == 0) return ans; String[] temp = str.split("\n"); List<String> dataList = new ArrayList<String>(); List<Double> probList = new ArrayList<Double>(); List<Date> timeList = new ArrayList<Date>(); for (String temp1 : temp) { String[] temp2 = temp1.split("\t\t"); dataList.add(temp2[0]); Double prob = 0.0; try { prob = Double.parseDouble(temp2[1]); } catch (Exception ee) { System.out.println(""); } probList.add(prob); timeList.add(null); } ans.setData(dataList); ans.setProb(probList); ans.setTime(timeList); return ans; }
public String printFormatData(DataItems data) { String ret = ""; StringWriter sw = new StringWriter(); BufferedWriter bw = new BufferedWriter(sw); int numRows = data.getLength(); try { for (int row = 0; row < numRows; row++) { DataItem s = data.getElementAt(row); bw.write(s.toString()); bw.write("\r\n"); } } catch (IOException e) { e.printStackTrace(); } finally { ret = sw.toString(); if (bw != null) try { bw.close(); } catch (Exception ee) { } if (sw != null) try { sw.close(); } catch (Exception ee) { } } return ret; }
// 按照时间,sizeWindow和stepWindow将数据分成项集,再调用Itemset2File写入文件 public static String[] movingdivide(DataItems datainput, TaskElement task, boolean FP) throws IOException { int sizeWindow = (int) ((ParamsSM) task.getMiningParams()).getSizeWindow() * 1000; // seconds int stepWindow = (int) ((ParamsSM) task.getMiningParams()).getStepWindow() * 1000; // seconds int len = datainput.getLength(); List<Date> time = datainput.getTime(); List<String> data = datainput.getData(); List<String> DataSets = new ArrayList<String>(); Date win_start_time = time.get(0); Date win_end_time = getDateAfter(win_start_time, sizeWindow); Date win_start_next = getDateAfter(win_start_time, stepWindow); int ind_next = -1; StringBuilder sb = new StringBuilder(); int i = 0; do { DataItem item = datainput.getElementAt(i); i++; Date date = item.getTime(); String val = item.getData(); if (!date.before(win_start_time) && !date.after(win_end_time)) { if (sb.length() != 0) sb.append(" "); sb.append(val + " -1"); if (!date.before(win_start_next) && ind_next == -1) ind_next = i - 1; } else { sb.append(" -2"); DataSets.add(sb.toString()); sb = new StringBuilder(); if (ind_next == -1) { if (!date.before(getDateAfter(win_end_time, stepWindow))) { win_start_time = date; if (sb.length() != 0) sb.append(" "); sb.append(val + " -1"); } else { win_start_time = win_start_next; // getDateAfter(win_start_time, stepWindow); if (sb.length() != 0) sb.append(" "); sb.append(val + " -1"); } } else { i = ind_next; ind_next = -1; win_start_time = win_start_next; } win_end_time = getDateAfter(win_start_time, sizeWindow); win_start_next = getDateAfter(win_start_time, stepWindow); } } while (i < len); sb.append(" -2"); DataSets.add(sb.toString()); return DataSets.toArray(new String[0]); }
public static DataItems sort(DataItems dataInput) { DataItem datin[] = new DataItem[dataInput.getLength()]; for (int i = 0; i < dataInput.getLength(); i++) { datin[i] = dataInput.getElementAt(i); } Arrays.sort(datin); DataItems di = new DataItems(); di.setItems(datin); return di; }
/** * 1.按照过滤条件读取数据<br> * 2.按照任务配置里设定的离散化方法,进行离散化<br> * 3.按照任务配置里的时间粒度,对数据进行聚合 * * @param filterCondition 过滤条件 * @param doAggregate 是否按时间对数据进行聚合 * @param doDiscretize 是否考虑离散化 * @return */ private DataItems readInput(String filterCondition, boolean doAggregate, boolean doDiscretize) { String sqlStr = "SELECT 事件发生时间," + task.getMiningObject() + " " + "FROM " + conn.DB_TABLE + " WHERE "; if (task.getFilterCondition().length() > 0) sqlStr += task.getFilterCondition() + " AND "; if (filterCondition != null && filterCondition.length() > 0) sqlStr += filterCondition + " AND "; sqlStr += "1=1 ORDER BY 事件发生时间 asc"; // 按时间先后顺序读取数据 conn.closeConn(); ResultSet rs = conn.sqlQuery(sqlStr); if (rs == null) { return null; } ResultSetMetaData meta = null; int numRecords = 0; try { meta = rs.getMetaData(); int numCols = meta.getColumnCount(); data = new DataItems(); while (rs.next()) { numRecords++; StringBuilder sb = new StringBuilder(); for (int i = 2; i <= numCols; i++) if (rs.getString(i) != null) sb.append(rs.getString(i).trim() + ","); if (sb.length() > 0) { Date d = parseTime(rs.getString(1).trim()); if (d != null) data.add1Data(d, sb.substring(0, sb.length() - 1)); else System.out.println(""); } } rs.close(); } catch (SQLException e) { e.printStackTrace(); } System.out.println("共" + numRecords + "条记录!"); System.out.println("读取完毕:" + data.getLength() + "条记录!"); boolean isNonDouble = !data.isAllDataIsDouble(); // 先进行时间粒度上的聚合 if (doAggregate) data = DataPretreatment.aggregateData( data, task.getGranularity(), task.getAggregateMethod(), isNonDouble); // 再进行离散化(只有数值型才能够离散化,否则应该会报错!) if (doDiscretize) data = DataPretreatment.toDiscreteNumbers( data, task.getDiscreteMethod(), task.getDiscreteDimension(), task.getDiscreteEndNodes()); data.setGranularity(task.getGranularity()); // 设置数据的一些参数,如粒度 String endNodes = data.discreteNodes(); task.setDiscreteEndNodes(endNodes); return data; }
/** * 按照任务配置读取数据,并按照任务配置里的时间粒度,对数据进行聚合 * * @param doAggregate 是否聚合 * @param doDiscretize 是否离散化 * @return */ public DataItems readInput(boolean doAggregate, boolean doDiscretize) { if (UtilsSimulation.instance.isUseSimulatedData()) // 使用 { TextUtils txt = new TextUtils(); txt.setTextPath(task.getSourcePath()); DataItems dataItems = txt.readInput(); boolean isNonDouble = !dataItems.isAllDataIsDouble(); // 离散的或非double型的 if (doAggregate) dataItems = DataPretreatment.aggregateData( dataItems, task.getGranularity(), task.getAggregateMethod(), isNonDouble); if (doDiscretize) dataItems = DataPretreatment.toDiscreteNumbers( dataItems, task.getDiscreteMethod(), task.getDiscreteDimension(), task.getDiscreteEndNodes()); String endNodes = dataItems.discreteNodes(); task.setDiscreteEndNodes(endNodes); return dataItems; } else { Calendar cal = Calendar.getInstance(); cal.set(1, 0, 1, 0, 0, 0); cal.set(Calendar.MILLISECOND, 0); Date dStart; Date dEnd; if (task.getDateStart() == null || task.getDateStart().equals(cal.getTime())) dStart = UtilsSimulation.instance.getStartTime(); else dStart = task.getDateStart(); if (task.getDateEnd() == null || task.getDateEnd().equals(cal.getTime())) dEnd = UtilsSimulation.instance.getCurTime(); else dEnd = task.getDateEnd(); if (dEnd.after(UtilsSimulation.instance.getCurTime())) dEnd = UtilsSimulation.instance.getCurTime(); return readInputBetween(dStart, dEnd); } }
/** @author kasper */ @RunWith(Theories.class) public class FileNamesTest { public interface TestData { public Path getFile(); public Date getExpectedDate(); } public enum DataItems implements TestData { @DataPoint FILE_JUNE_9TH( "results-" + CONCEPT_LOCALNAME + "-19800609-105316.json", new Date(329388796000l) /* "9 jun 1980 10:53:16" */), FILE_CONCEPT_OCT_22( "results-" + CONCEPT_LOCALNAME + "-20131022-190714.json", new Date(1382461634000l) /* "22 oct 2013 19:07:14" */), DIR_FILE_2030( "results-" + CONCEPT_LOCALNAME + "-20300407-152345.json", new Date(1901798625000l) /* "7 apr 2030 15:23:45" */), DIR_FILE_2030_JUST_BEFORE( "results-" + CONCEPT_LOCALNAME + "-20300407-152344.json", new Date(1901798624000l)); private DataItems(String filename, Date date_) { this.file = Paths.get(filename); this.expectedDate = date_; } private DataItems(Path dir, String filename, Date date_) { this.file = dir.resolve(Paths.get(filename)); this.expectedDate = date_; } private final Path file; private final Date expectedDate; @Override public Path getFile() { return file; } @Override public Date getExpectedDate() { return expectedDate; } } @DataPoints public static final TestData[] ALL_DATA_ITEMS = DataItems.values(); @DataPoint public static final List<Path> SEQ_SORTED = Arrays.asList( DataItems.FILE_JUNE_9TH.getFile(), DataItems.FILE_CONCEPT_OCT_22.getFile(), DataItems.DIR_FILE_2030_JUST_BEFORE.getFile(), DataItems.DIR_FILE_2030.getFile()); @DataPoint public static final List<Path> SEQ_REVERSED = Arrays.asList( DataItems.DIR_FILE_2030.getFile(), DataItems.DIR_FILE_2030_JUST_BEFORE.getFile(), DataItems.FILE_CONCEPT_OCT_22.getFile(), DataItems.FILE_JUNE_9TH.getFile()); @DataPoint public static final List<Path> SINGLE = Arrays.asList(DataItems.DIR_FILE_2030.getFile()); private static final String CONCEPT_LOCALNAME = "testconcept"; private final List<Path> filesInTestDir; private Concept concept; private FileNames testee; private Path tmpDir; public FileNamesTest(List<Path> filesInTestDir_) { this.filesInTestDir = filesInTestDir_; } @Before public void setup() { MockitoAnnotations.initMocks(this); concept = mockConcept(); try { tmpDir = createTmpDir(filesInTestDir); testee = new FileNames(tmpDir); } catch (IOException ex) { ex.printStackTrace(); throw new Error("Cannot create tmp dir", ex); } } @Theory public void testParseDate(TestData data) { try { assertThat(testee.parseDate(data.getFile()), is(equalTo(data.getExpectedDate()))); } catch (Exception ex) { ex.printStackTrace(System.err); throw ex; } } @Test public void testMostRecentFile() { try { assertThat( testee.getMostRecent(filesInTestDir), is(equalTo(DataItems.DIR_FILE_2030.getFile()))); } catch (Exception ex) { ex.printStackTrace(System.err); throw ex; } } @Test public void testCreateHtmlFile() throws IOException { File testFile = testee.createHtmlResultsFile(); assertCanCreateWritableFile(testFile); } @Test public void testCreateJsonFile() throws IOException { File testFile = testee.createJsonResultsFile(concept); assertCanCreateWritableFile(testFile); } @Test public void testGetMostRecentJson() throws IOException { File result = testee.getMostRecentJson(concept); Path path_result = result.toPath(); assertThat(path_result, is(equalTo(tmpDir.resolve(DataItems.DIR_FILE_2030.getFile())))); } private void displayDateAsLong() { try { System.out.println( new SimpleDateFormat("d MMM yyyy hh:mm:ss").parse("7 apr 2030 15:23:45").getTime()); } catch (ParseException ex) { Logger.getLogger(FileNamesTest.class.getName()).log(Level.SEVERE, null, ex); } } private Concept mockConcept() { QNameUtil qnames = QNameUtil.instance(); Concept mocked = mock(Concept.class); try { when(mocked.getName()) .thenReturn(qnames.createQNameInPreconstructedNamespace(CONCEPT_LOCALNAME)); } catch (Exception ex) { throw new Error(ex); } return mocked; } private static void assertCanCreateWritableFile(File destination) throws IOException { boolean createdFreshFile = destination.createNewFile(); if (createdFreshFile) { try { if (destination.canWrite()) { try (FileWriter writer = new FileWriter(destination)) { writer.append("dummy test output"); } } else { fail(String.format("Cannot write to %s", destination)); } } finally { destination.delete(); } } else { fail(String.format("Cannot create file: %s", destination)); } } private static Path createTmpDir(List<Path> files) throws IOException { Path tmpDir = Files.createTempDirectory("eligibilityToolTestFileNames"); tmpDir.toFile().deleteOnExit(); for (Path tmpFile : files) { touchTmpFile(tmpDir, tmpFile); } return tmpDir; } private static void touchTmpFile(Path tmpDir, Path file) throws IOException { try { Path composed = tmpDir.resolve(file); Path parent = composed.getParent(); ensureCanWriteTmpFiles(parent); File tmpFile = composed.toFile(); boolean createdFreshFile = tmpFile.createNewFile(); if (createdFreshFile) { tmpFile.deleteOnExit(); } } catch (IOException ex) { System.out.println(tmpDir); System.out.println(file); throw ex; } } private static void createTmpDir(Path tmpDir) throws IOException { Path parent = tmpDir.getParent(); ensureCanWriteTmpFiles(parent); File filesystemDir = tmpDir.toFile(); boolean createdDir = filesystemDir.mkdir(); System.out.println("mkdir: " + tmpDir); if (createdDir) { filesystemDir.deleteOnExit(); } } private static void ensureCanWriteTmpFiles(Path dir) throws IOException { File filesystemDir = dir.toFile(); if (!filesystemDir.exists()) { createTmpDir(dir); } if (!filesystemDir.isDirectory()) { throw new NotDirectoryException(dir.toString()); } if (!filesystemDir.canWrite()) { throw new AccessDeniedException(dir.toString()); } } }