public boolean imp() { boolean res = false; DbVariable dbVariable = null; String fullFileName = ""; String errMessage = null; DbImportFile dbInFile = new DbImportFile(); try { dbVariable = new DbVariable(); // Store the file on server filesystem fullFileName = dbInFile.storeImportFileBLOB(conn_viss, ifid); FileParser fileParser = new FileParser(fullFileName); fileParser.Parse( FileTypeDefinitionList.matchingDefinitions( FileTypeDefinition.VARIABLE, FileTypeDefinition.LIST)); dbVariable.CreateVariables(fileParser, connection, sampleUnitId, Integer.parseInt(userId)); errMessage = dbVariable.getErrorMessage(); Assertion.assertMsg(errMessage == null || errMessage.trim().equals(""), errMessage); dbInFile.setStatus(conn_viss, ifid, "IMPORTED"); // dbInFile.UpdateImportFile(connection,null,null,"Done",Integer.parseInt(ifid),Integer.parseInt(userId)); // Add a message to the log dbInFile.addErrMsg( conn_viss, ifid, "File imported to sampling unit " + DbSamplingUnit.getSUName(conn_viss, Integer.toString(sampleUnitId)) + "Note: Markers is always imported in Create mode."); res = true; } catch (Exception e) { Errors.logError("ImportVariables.imp(...)" + e.getMessage()); dbInFile.setStatus(conn_viss, ifid, "ERROR"); // dbInFile.UpdateImportFile(connection,null,null,e.getMessage(),Integer.parseInt(ifid),Integer.parseInt(userId)); // Add a message to the log dbInFile.addErrMsg(conn_viss, ifid, e.getMessage()); e.printStackTrace(System.err); if (errMessage == null) { errMessage = e.getMessage(); } } finally { try { /* * Delete files uploaded */ File tmp = new File(fullFileName); tmp.delete(); } catch (Exception ignore) { } } return res; }
@Test public void test() throws IOException { String rootPath = "data/"; FileParser parser = new FileParser(); String absOutputPath = parser.setOutputPath(rootPath); System.out.println("Output path: " + absOutputPath); // Assert.assertEquals("", absOutputPath); int count = parser.parse(new File(rootPath + "wsj_0600.tok")); Assert.assertEquals(2, count); count = parser.parse(new File(rootPath + "file2.tok")); Assert.assertEquals(24, count); Assert.assertEquals(2, parser.docCount); }
@Test public void testParse() throws ParsingFailedException, IOException { try (InputStream stream = ClassLoader.getSystemClassLoader().getResourceAsStream("testfile.xlsx")) { FileParser parser = new XlsxFileParser(); parser.setRowParsedListener( new RowParsedListener() { @Override public void parsed(final Row row) { parsedLines++; } }); parser.parse(stream); Assert.assertEquals(1, parsedLines, 0.1); } }
public void writeCoverageInfoIfLineStartsANewFieldDeclaration(FileParser fileParser) { if (classAndFieldNames != null && className.equals(fileParser.getCurrentlyPendingClass())) { LineElement initialLineElement = fileParser.lineParser.getInitialElement(); LineElement elementWithFieldName = initialLineElement.findWord(fieldName); if (elementWithFieldName != null) { buildOpeningTagForFieldWrapper(); elementWithFieldName.wrapText(openingTag.toString(), "</span>"); moveToNextField(); } } }
/* Arguments: [0] = name to search for, [1] = the path of the file to search.*/ public static void main(String args[]) { if (args.length != 2) { System.err.println( "\nYou have entered an invalid number of arguments." + "\n\nPlease use the name to search for followed by file to search through."); System.exit(1); } String key = args[0]; String pathName = args[1]; System.out.println("\nSearching for '" + key + "' in " + pathName + "..."); ArrayList<Record> dataStore = FileParser.Parse(pathName); String phoneNumber = BinarySearchAlgorithm.pullKeyValue(key, (ArrayList<Record>) dataStore); if (phoneNumber == null) { System.out.println("\nNo record was found for " + key + "."); } else { System.out.println("\nRetrieved " + phoneNumber + " for " + key + ".\n"); } }
public boolean check() { Errors.logDebug("CheckVariables started"); boolean res = false; DbImportFile dbInFile = new DbImportFile(); String fullFileName = ""; String checkFileName = ""; String errMessage = null; try { // Create the variable DbVariable dbVariable = new DbVariable(); dbInFile.setStatus(conn_viss, ifid, "0%"); // Store the file on server filesystem fullFileName = dbInFile.storeImportFileBLOB(conn_viss, ifid); checkFileName = fullFileName + "_checked"; FileHeader header = FileParser.scanFileHeader(fullFileName); String type = header.formatTypeName().toUpperCase(); char delimiter = header.delimiter().charValue(); FileParser fileParser = new FileParser(fullFileName); fileParser.Parse( FileTypeDefinitionList.matchingDefinitions( FileTypeDefinition.VARIABLE, FileTypeDefinition.LIST)); // Write out the result to a new file FileWriter fileOut = new FileWriter(checkFileName); fileOut.write( header.objectTypeName() + "/" + header.formatTypeName() + "/" + header.version() + "/" + header.delimiter() + "\n"); String titles[] = fileParser.columnTitles(); for (int j = 0; j < titles.length; j++) { fileOut.write(titles[j] + delimiter); } fileOut.write("\n"); String errorMsg = ""; errorMsg = checkList(fileParser, fileOut, delimiter); fileOut.close(); /* * Save the file to database */ dbInFile.saveCheckedFile(conn_viss, ifid, checkFileName); if (errorMsg.startsWith("ERROR:")) { res = false; dbInFile.setStatus(conn_viss, ifid, "ERROR"); // Add a message to the log dbInFile.addErrMsg( conn_viss, ifid, "File checked failed for sampling unit " + DbSamplingUnit.getSUName(conn_viss, Integer.toString(sampleUnitId)) + ". <br>" + errorMsg); Errors.logDebug("ErrorMsg=" + errorMsg); } else if (errorMsg.startsWith("WARNING:")) { dbInFile.setStatus(conn_viss, ifid, "WARNING"); res = true; // Add a message to the log dbInFile.addErrMsg(conn_viss, ifid, "Warnings exists: " + errorMsg); } else { dbInFile.setStatus(conn_viss, ifid, "CHECKED"); res = true; // Add a message to the log dbInFile.addErrMsg( conn_viss, ifid, "File checked for sampling unit " + DbSamplingUnit.getSUName(conn_viss, Integer.toString(sampleUnitId)) + ".<br>" + errorMsg); } /* if (errorMsg.length()>0) { res = false; dbInFile.setStatus(conn_viss,ifid,"ERROR"); // Add a message to the log dbInFile.addErrMsg(conn_viss,ifid,"File failed the check for sampling unit "+DbSamplingUnit.getSUName(conn_viss,Integer.toString(sampleUnitId)) +"<br>"+errorMsg); } else { res = true; dbInFile.setStatus(conn_viss,ifid,"CHECKED"); // Add a message to the log dbInFile.addErrMsg(conn_viss,ifid,"File checked for sampling unit "+DbSamplingUnit.getSUName(conn_viss,Integer.toString(sampleUnitId)) +"<br>"+errorMsg); }*/ } catch (Exception e) { dbInFile.setStatus(conn_viss, ifid, "ERROR"); // Add a message to the log dbInFile.addErrMsg(conn_viss, ifid, e.getMessage()); e.printStackTrace(System.err); if (errMessage == null) { errMessage = e.getMessage(); } } finally { try { /* * Delete files uploaded */ File tmp = new File(checkFileName); tmp.delete(); tmp = new File(fullFileName); tmp.delete(); } catch (Exception ignore) { } } Errors.logDebug("CheckVariables completed"); return res; }
public static void main(String[] args) { if (args.length < 2) { System.out.println(Helper.message()); return; } Scanner sc = new Scanner(System.in); String inputFname = args[0]; String outputFname = args[1]; String errorFname; if (args.length < 3) { errorFname = "ERROR.log"; } else { errorFname = args[2]; } File input = new File(inputFname); File output = new File(outputFname); File error = new File(errorFname); if (!getFileType(input).toLowerCase().equals("dat")) { System.out.print(" Input file type is not DAT. Do you want to continue (Y/N) ? "); String answer = sc.nextLine(); if (!answer.toLowerCase().equals("y")) { System.out.println(" Aborting..."); sc.close(); return; } } if (!getFileType(output).toLowerCase().equals("vmr")) { System.out.print(" Output file type is not VMR. Do you want to continue (Y/N) ? "); String answer = sc.nextLine(); if (!answer.toLowerCase().equals("y")) { System.out.println(" Aborting..."); sc.close(); return; } } if (!getFileType(error).toLowerCase().equals("log")) { System.out.print(" Output file type is not LOG. Do you want to continue (Y/N) ? "); String answer = sc.nextLine(); if (!answer.toLowerCase().equals("y")) { System.out.println(" Aborting..."); sc.close(); return; } } FileParser parser = new FileParser(input, output, error); try { int numErrors = parser.execute(); if (numErrors == 0) { System.out.println("No errors during execution."); if (error.exists()) { error.delete(); } } else { System.out.println( "There were " + numErrors + " errors during execution, see " + error.getName() + "."); } } catch (FileParserException e) { System.out.println(" Fatal error during execution:"); System.out.println(" " + e.getMessage()); } sc.close(); }
public boolean imp() { boolean res = false; String errMessage = null; DbImportFile dbInFile = new DbImportFile(); DbPhenotype dbp = new DbPhenotype(); String fullFileName = null; try { Errors.logInfo("CheckPhenotype started"); // connection.setAutoCommit(false); dbInFile.setStatus(conn_viss, ifid, "0%"); fullFileName = dbInFile.storeImportFileBLOB(conn_viss, ifid); FileHeader header = FileParser.scanFileHeader(fullFileName); FileParser fileParser = new FileParser(fullFileName); // Set status dbInFile.setStatus(conn_viss, ifid, "10%"); // Ensure file format is list or matrix Assertion.assertMsg( header.formatTypeName().equalsIgnoreCase(FileTypeDefinition.LIST) || header.formatTypeName().equalsIgnoreCase(FileTypeDefinition.MATRIX), "Format type name should be list or matrix " + "but found found " + header.formatTypeName()); // If file is a list if (header.formatTypeName().equalsIgnoreCase(FileTypeDefinition.LIST)) { fileParser.Parse( FileTypeDefinitionList.matchingDefinitions( FileTypeDefinition.PHENOTYPE, FileTypeDefinition.LIST)); dbInFile.setStatus(conn_viss, ifid, "20%"); if (updateMethod.equals("CREATE")) { dbp.CreatePhenotypesList( fileParser, connection, sampleUnitId, Integer.valueOf(userId).intValue()); } else if (updateMethod.equals("UPDATE")) { dbp.UpdatePhenotypesList( fileParser, connection, sampleUnitId, Integer.valueOf(userId).intValue()); } else if (updateMethod.equals("CREATE_OR_UPDATE")) { dbp.CreateOrUpdatePhenotypesList( fileParser, connection, sampleUnitId, Integer.valueOf(userId).intValue()); } } // If file is a matrix else if (header.formatTypeName().equalsIgnoreCase(FileTypeDefinition.MATRIX)) { fileParser.Parse( FileTypeDefinitionList.matchingDefinitions( FileTypeDefinition.PHENOTYPE, FileTypeDefinition.MATRIX)); dbInFile.setStatus(conn_viss, ifid, "20%"); if (updateMethod.equals("CREATE")) { dbp.CreatePhenotypesMatrix( fileParser, connection, sampleUnitId, Integer.valueOf(userId).intValue()); } else if (updateMethod.equals("UPDATE")) { dbp.UpdatePhenotypesMatrix( fileParser, connection, sampleUnitId, Integer.valueOf(userId).intValue()); } else if (updateMethod.equals("CREATE_OR_UPDATE")) { dbp.CreateOrUpdatePhenotypesMatrix( fileParser, connection, sampleUnitId, Integer.valueOf(userId).intValue()); } } errMessage = dbp.getErrorMessage(); Assertion.assertMsg(errMessage == null || errMessage.trim().equals(""), errMessage); dbInFile.setStatus(conn_viss, ifid, "IMPORTED"); // Add a message to the log dbInFile.addErrMsg( conn_viss, ifid, "File imported for sampling unit " + DbSamplingUnit.getSUName(conn_viss, Integer.toString(sampleUnitId))); res = true; Errors.logInfo("Check Phenotype ended"); } catch (Exception e) { // Flag for error and set the errMessage if it has not been set // isOk = false; dbInFile.setStatus(conn_viss, ifid, "ERROR"); // Add a message to the log dbInFile.addErrMsg(conn_viss, ifid, e.getMessage()); e.printStackTrace(System.err); if (errMessage == null) { errMessage = e.getMessage(); } } return res; }
public boolean check() { Errors.logDebug("CheckPhenotype started"); boolean res = false; String errMessage = null; FileWriter fileOut = null; DbImportFile dbInFile = new DbImportFile(); String fullFileName = ""; String checkFileName = ""; try { dbInFile.setStatus(conn_viss, ifid, "0%"); fullFileName = dbInFile.storeImportFileBLOB(conn_viss, ifid); checkFileName = fullFileName + "_checked"; // Create the Phenotype DbPhenotype dbPhenotype = new DbPhenotype(); FileHeader header = FileParser.scanFileHeader(fullFileName); String type = header.formatTypeName().toUpperCase(); char delimiter = header.delimiter().charValue(); // AbstractValueFileParser fp = null; FileParser fp = null; if (type.equals("LIST")) { fp = new FileParser(fullFileName); fp.Parse( FileTypeDefinitionList.matchingDefinitions( FileTypeDefinition.PHENOTYPE, FileTypeDefinition.LIST)); } else if (type.equals("MATRIX")) { fp = new FileParser(fullFileName); fp.Parse( FileTypeDefinitionList.matchingDefinitions( FileTypeDefinition.PHENOTYPE, FileTypeDefinition.MATRIX)); } // Write out the result to a new file fileOut = new FileWriter(checkFileName); fileOut.write( header.objectTypeName() + "/" + header.formatTypeName() + "/" + header.version() + "/" + header.delimiter() + "\n"); String titles[] = fp.columnTitles(); for (int j = 0; j < titles.length; j++) { fileOut.write(titles[j] + delimiter); } fileOut.write("\n"); // Garbage collect the unused variables header = null; // fullFileName = null; // checkFileName = null; // Fix to upper case updateMethod = updateMethod.toUpperCase(); Vector fatalErrors = new Vector(); if (type.equals("LIST")) checkListTitles(titles, fatalErrors); else checkMatrixTitles(titles, fatalErrors); writeTitleErrors(fileOut, fatalErrors); String indId; if (titles[0].equals("IDENTITY")) indId = "IDENTITY"; else indId = "Alias"; String errMsg = ""; if (type.equals("LIST")) errMsg = checkList((FileParser) fp, fileOut, delimiter, indId); // errMsg = checkList((FileParser)fp, fatalErrors,fileOut,delimiter,indId); else if (type.equals("MATRIX")) errMsg = checkMatrix((FileParser) fp, fatalErrors, fileOut, delimiter, indId); // Close the file fileOut.close(); /* * Save the file to database */ dbInFile.saveCheckedFile(conn_viss, ifid, checkFileName); // Get the error message from the database object. If it is set an // error occured during the operation so an error is thrown. // errMessage = dbIndividual.getErrorMessage(); // Assertion.assertMsg(errMessage == null || // errMessage.trim().equals(""), errMessage); if (errMsg.startsWith("ERROR:")) { dbInFile.setStatus(conn_viss, ifid, "ERROR"); res = false; } else if (errMsg.startsWith("WARNING:")) { dbInFile.setStatus(conn_viss, ifid, "WARNING"); res = true; } else { dbInFile.setStatus(conn_viss, ifid, "CHECKED"); res = true; } // Add a message to the log dbInFile.addErrMsg( conn_viss, ifid, "File checked for sampling unit " + DbSamplingUnit.getSUName(conn_viss, Integer.toString(sampleUnitId)) + "<br>\n" + errMsg); } catch (Exception e) { // Flag for error and set the errMessage if it has not been set // isOk = false; dbInFile.setStatus(conn_viss, ifid, "ERROR"); // dbInFile.UpdateImportFile(connection,null,null,e.getMessage(),Integer.parseInt(ifid),Integer.parseInt(userId)); // Add a message to the log dbInFile.addErrMsg(conn_viss, ifid, e.getMessage()); e.printStackTrace(System.err); if (errMessage == null) { errMessage = e.getMessage(); } } finally { try { /* * Delete temporary file */ File tmp = new File(fullFileName); tmp.delete(); tmp = null; tmp = new File(checkFileName); tmp.delete(); tmp = null; } catch (Exception ignore) { } } Errors.logDebug("CheckPhenotype completed"); return res; }
public String checkMatrix( FileParser fp, Vector fatalErrors, FileWriter fileOut, char delimiter, String indId) { String errMsg = ""; // String ind, marker = "", allele1 = "", allele2 = ""; //, raw1, raw2; //, //ref; //, comm; String ind = "", variable = "", value = ""; // String alleles[]; int nrErrors = 0; int nrWarnings = 0; int nrDeviations = 0; /* Vector errorMessages = new Vector(); Vector warningMessages = new Vector(); Vector deviationMessages = new Vector(); Vector databaseValues = new Vector(); */ DbImportFile dbInFile = new DbImportFile(); String statusStr; double status; double status_last = 0.0; int dataRows = fp.dataRows(); String titles[] = fp.columnTitles(); String variables[] = new String[titles.length - 1]; for (int i = 0; i < variables.length; i++) variables[i] = titles[i + 1]; Vector deviationMessages = null; Vector databaseValues = null; Vector newAlleles = null; Vector values = null; warningList = new ArrayList(); errorList = new ArrayList(); for (int row = 0; row < fp.dataRows(); row++) { deviationMessages = new Vector(); databaseValues = new Vector(); values = new Vector(); ind = fp.getValue(indId, row); // newAlleles = new Vector(); // check the whole row for (int mNum = 0; mNum < variables.length; mNum++) { // String old_alleles[]=null; variable = variables[mNum]; value = fp.getValue(variable, row); // Add the values for error writing values.add(value); // check that values exist, have correct length etc checkValues(ind, variable, value, null, null, null); if (updateMethod.equals("CREATE")) checkCreate(titles[0], ind, variable, value, null, null, null, sampleUnitId); else if (updateMethod.equals("UPDATE")) checkUpdate( titles[0], ind, variable, value, null, null, null, sampleUnitId, deviationMessages, databaseValues, delimiter); else if (updateMethod.equals("CREATE_OR_UPDATE")) checkCreateOrUpdate( titles[0], ind, variable, value, null, null, null, sampleUnitId, deviationMessages, databaseValues, delimiter); } // for markers nrErrors += errorList.size(); nrDeviations += deviationMessages.size(); nrWarnings += warningList.size(); writeMatrixErrors( fileOut, deviationMessages, databaseValues, values, ind, delimiter, variable, value); /* //newAlleles= new Vector(); databaseValues = new Vector(); errorMessages=new Vector(); warningMessages=new Vector(); deviationMessages=new Vector(); */ /* * Set the status of the import, visible to the user */ status = (new Double(row * 100 / (1.0 * dataRows))).doubleValue(); if (status_last + 5 < status) { status_last = status; statusStr = Integer.toString((new Double(status)).intValue()) + "%"; dbInFile.setStatus(conn_viss, ifid, statusStr); } errorList.clear(); warningList.clear(); } // for rows if (nrErrors > 0) errMsg = "ERROR: Import of the genotypes failed."; else if (nrWarnings > 0) errMsg = "WARNING: Some warnings exist in the import file"; else errMsg = "Genotype file is correct"; errMsg += "\nDeviations:" + nrDeviations + "\nWarnings:" + nrWarnings + "\nErrors:" + nrErrors; return errMsg; }
// public String checkList(FileParser fp, Vector errorMessages, FileWriter fileOut, char // delimiter, String indId) public String checkList(FileParser fp, FileWriter fileOut, char delimiter, String indId) { // String ind, marker, allele1, allele2, raw1, raw2, ref, comm; String ind, variable, value, date, ref, comm; String errMsg = ""; int nrErrors = 0; int nrWarnings = 0; int nrDeviations = 0; int dataRows = fp.dataRows(); String titles[] = fp.columnTitles(); DbImportFile dbInFile = new DbImportFile(); String statusStr; double status; double status_last = 0.0; warningList = new ArrayList(); errorList = new ArrayList(); for (int i = 0; i < dataRows; i++) { // Vector errorMessages = new Vector(); // Vector warningMessages = new Vector(); Vector deviationMessages = new Vector(); Vector databaseValues = new Vector(); ind = ((FileParser) fp).getValue(indId, i).trim(); variable = ((FileParser) fp).getValue("VARIABLE", i).trim(); value = ((FileParser) fp).getValue("VALUE", i).trim(); date = ((FileParser) fp).getValue("DATE", i).trim(); ref = ((FileParser) fp).getValue("REF", i).trim(); comm = ((FileParser) fp).getValue("COMMENT", i).trim(); // Check for valid data values. // Check for length, remove null and so on. // Syntax check. checkValues(ind, variable, value, date, ref, comm); // checkValues(ind, variable, value, date, ref, comm, fatalErrors); // If create updateMethod if (updateMethod == null || updateMethod.equals("CREATE")) checkCreate(titles[0], ind, variable, value, date, ref, comm, sampleUnitId); // If update updateMethod else if (updateMethod.equals("UPDATE")) checkUpdate( titles[0], ind, variable, value, date, ref, comm, sampleUnitId, deviationMessages, databaseValues, delimiter); // if both update and add else if (updateMethod.equals("CREATE_OR_UPDATE")) checkCreateOrUpdate( titles[0], ind, variable, value, date, ref, comm, sampleUnitId, deviationMessages, databaseValues, delimiter); nrErrors += errorList.size(); nrDeviations += deviationMessages.size(); nrWarnings += warningList.size(); // write row + all errors encountered to file writeListErrors( fileOut, deviationMessages, databaseValues, ind, delimiter, variable, value, date, ref, comm); /* * Set the status of the import, visible to the user */ status = (new Double(i * 100 / (1.0 * dataRows))).doubleValue(); if (status_last + 5 < status) { status_last = status; statusStr = Integer.toString((new Double(status)).intValue()) + "%"; dbInFile.setStatus(conn_viss, ifid, statusStr); } errorList.clear(); warningList.clear(); } if (nrErrors > 0) errMsg = "ERROR: Import of the Phenotypes failed."; else if (nrWarnings > 0) errMsg = "WARNING: Some warnings exist in the import file"; else errMsg = "Phenotype file is correct"; errMsg += "\nDeviations:" + nrDeviations + "\nWarnings:" + nrWarnings + "\nErrors:" + nrErrors; return errMsg; }
public static void test1(String seed) { /* read, init data & parameters */ for (int t = t0; t < T; t++) { // String fileDir = "../../data/graph/" + Integer.toString(t) + ".csv"; // original // co-voting dataset // String fileDir = "./data/" + Integer.toString(t) + ".csv"; // artificial toy dataset String fileDir = "../../data_sm/nips_17/out/" + seed + "/" + Integer.toString(t) + ".train.csv"; // nips dataset (smaller) Map<Integer, Double> freq = FileParser.readCSVDict(fileDir); double[][] G = new double[n][n]; double[][] A = new double[n][n]; double[][] mu = new double[n][K]; double[][] mu_hat = new double[n][K]; double[][] mu_prime = new double[n][K]; double[][] mu_hat_prime = new double[n][K]; double[][] h = new double[n][K]; double[][] h_hat = new double[n][K]; FileParser.readCSVGraph(fileDir, freq, G, A); for (int i = 0; i < n; i++) for (int k = 0; k < K; k++) { mu[i][k] = scale_0 * (rand.nextDouble() - 0.5); mu_hat[i][k] = scale_0 * (rand.nextDouble() - 0.5); mu_prime[i][k] = mu[i][k]; mu_hat_prime[i][k] = mu_hat[i][k]; h[i][k] = scale * (rand.nextDouble() - 0.5); h_hat[i][k] = scale * (rand.nextDouble() - 0.5); } GS.add(G); AS.add(A); mu_s.add(mu); mu_hat_s.add(mu_hat); mu_prime_s.add(mu_prime); mu_hat_prime_s.add(mu_hat_prime); h_s.add(h); h_prime_s.add(h); h_hat_s.add(h_hat); h_hat_prime_s.add(h); /* for test */ delta_s.add(delta); delta_prime_s.add(delta); // TODO previous: 0.1 v_s.add(0.1); v_hat_s.add(0.1); v_prime_s.add(0.1); v_hat_prime_s.add(0.1); System.out.println("done! t = " + t); } for (int t = t0; t < T; t++) { for (int s = t0; s < T; s++) { grad_mu_s.add(new double[n][K]); grad_mu_hat_s.add(new double[n][K]); grad_mu_prime_s.add(new double[n][K]); grad_mu_hat_prime_s.add(new double[n][K]); } grad_h_hat_s.add(new double[n][K]); grad_h_hat_prime_s.add(new double[n][K]); } /* end initialization */ /* outer for-loop */ double old_obj_1 = -1, old_obj_2 = -1; for (int iter = 0; iter < MAX_ITER; iter++) { // Scanner sc = new Scanner(System.in); int gu; gu = sc.nextInt(); System.out.println("====== iter = " + iter + " ======"); /** intrinsic feature * */ forward1(true, iter); backward1(true); compute_gradient1(iter); double new_obj_1 = 0; /* gradient descent: inner for-loop here */ int inner_iter_1 = 0; while (inner_iter_1 < INNER_ITER) { /* update variational parameters \hat{h} using gradient descent */ for (int t = 0; t < T - t0; t++) { double[][] h_hat_t = h_hat_s.get(t); double[][] grad_h_hat_t = grad_h_hat_s.get(t); for (int i = 0; i < n; i++) for (int k = 0; k < K; k++) { h_hat_t[i][k] += lr_1 * grad_h_hat_t[i][k]; } h_hat_s.set(t, h_hat_t); } /* update \hat{\mu} and \hat{V}, since both are function of \hat{h} */ forward1(false, iter); backward1(false); double obj1 = compute_objective1(); if (inner_iter_1 % 10 == 0) System.out.println("(1) iter = " + inner_iter_1 + ", obj 1 = " + obj1); if (inner_iter_1 != 0 && obj1 < new_obj_1) { lr_1 *= 0.8; break; } new_obj_1 = obj1; inner_iter_1 += 1; } if (inner_iter_1 == INNER_ITER) lr_1 *= 2; /* sample */ for (int t = 0; t < T - t0; t++) { double[][] samples = Operations.sample_multivariate_normal(mu_hat_s.get(t), v_hat_s.get(t), N_SAMPLES); double[][] h_t = new double[n][K]; for (int i = 0; i < n; i++) for (int k = 0; k < K; k++) { h_t[i][k] = samples[i][k]; } h_s.set(t, h_t); } /** impression feature * */ forward2(true); backward2(true); compute_gradient2(iter); double new_obj_2 = 0; /* gradient descent: inner for-loop here */ int inner_iter_2 = 0; while (inner_iter_2 < INNER_ITER) { /* update \hat{h}' using gradient descent */ for (int t = 0; t < T - t0; t++) { double[][] h_hat_prime_t = h_hat_prime_s.get(t); double[][] grad_h_hat_prime_t = grad_h_hat_prime_s.get(t); for (int i = 0; i < n; i++) for (int k = 0; k < K; k++) { h_hat_prime_t[i][k] += lr_2 * grad_h_hat_prime_t[i][k]; } h_hat_prime_s.set(t, h_hat_prime_t); } /* update \hat{\mu}' and \hat{V}', since both are function of \hat{h}' */ forward2(false); backward2(false); double obj2 = compute_objective2(); if (inner_iter_2 % 10 == 0) System.out.println("(2) iter = " + inner_iter_2 + ", obj 2 = " + obj2); if (inner_iter_2 != 0 && obj2 < new_obj_2) { lr_2 *= 0.8; break; } new_obj_2 = obj2; inner_iter_2 += 1; } if (inner_iter_2 == INNER_ITER) lr_2 *= 2; /* sample */ for (int t = 0; t < T - t0; t++) { double[][] samples = Operations.sample_multivariate_normal( mu_hat_prime_s.get(t), v_hat_prime_s.get(t), N_SAMPLES); double[][] h_prime_t = new double[n][K]; for (int i = 0; i < n; i++) for (int k = 0; k < K; k++) { h_prime_t[i][k] = samples[i][k]; } h_prime_s.set(t, h_prime_t); } /** output * */ for (int t = 0; t < T - t0; t++) { double[][] h_t = h_s.get(t); double[][] h_prime_t = h_prime_s.get(t); /* output filename: * ./res/<seed>_<sigma>/h_<time>_<iter>.txt */ FileParser.output( h_t, "./res/" + seed + "_" + delta_str + "/h_" + (t + t0) + "_" + iter + ".txt"); FileParser.output( h_prime_t, "./res/" + seed + "_" + delta_str + "/h_p_" + (t + t0) + "_" + iter + ".txt"); } /* check convergence */ double diff_1 = -(new_obj_1 - old_obj_1) / old_obj_1; double diff_2 = -(new_obj_2 - old_obj_2) / old_obj_2; if (iter != 0 && diff_1 < 1e-6 && diff_2 < 1e-6) { System.out.println("diff_1 = " + diff_1); System.out.println("diff_2 = " + diff_2); break; } old_obj_1 = new_obj_1; old_obj_2 = new_obj_2; } }
public static void compute_gradient2(int iteration) { double[][][] tmp_grad_h_hat_prime_s = new double[T - t0][n][K]; /* * compute * nti[t][i] = \sum_{j} { n_{ij} } * and * nti_h[t][j][k] = \sum_{i} { n_{ij}^{t} h_{ik}^{t} } */ double[][] nti = new double[T - t0][n]; double[][][] nti_h = new double[T - t0][n][K]; for (int t = 0; t < T - t0; t++) { double[][] G_t = GS.get(t); double[][] h_t = h_s.get(t); // h^{t} for (int i = 0; i < n; i++) for (int j = 0; j < n; j++) { nti[t][i] += G_t[i][j]; for (int k = 0; k < K; k++) { nti_h[t][j][k] += G_t[i][j] * h_t[i][k]; } } } for (int t = 0; t < T - t0; t++) { double delta_t = delta_prime_s.get(t); double[][] h_t = h_s.get(t); // h^{t} double[][] h_hat_prime_t = h_hat_prime_s.get(t); // \hat{h}^{t} double[][] mu_hat_t = mu_hat_s.get(t); // \hat{\mu}^{t} double[][] mu_hat_prime_t = mu_hat_prime_s.get(t); // \hat{\mu}'^{t} double[][] h_prime_t = h_prime_s.get(t); if (t != 0) { Matrix a = new Matrix(AS.get(t - 1)); Matrix hprime_pre_t = new Matrix(h_prime_s.get(t - 1)); Matrix ave_neighbors = a.times(hprime_pre_t); double[][] G_pre_t = GS.get(t - 1); // G^{t-1} double[][] A_pre_t = AS.get(t - 1); // A^{t-1} double[][] h_pre_t = h_s.get(t - 1); // h^{t-1} double[][] mu_hat_prime_pre_t = mu_hat_prime_s.get(t - 1); // \hat{\mu}'^{t-1} [t] for (int s = 0; s < T - t0; s++) { double[][] grad_mu_hat_prime_t = grad_mu_hat_prime_s.get(t * (T - t0) + s); double[][] grad_mu_hat_prime_pre_t = grad_mu_hat_prime_s.get((t - 1) * (T - t0) + s); double[] h2delta2 = new double[n]; for (int i = 0; i < n; i++) for (int k = 0; k < K; k++) { h2delta2[i] += 0.5 * h_t[i][k] * h_t[i][k] * delta_t * delta_t; } /* compute weighted_exp for later use */ double[][][] weighted_exp_num = new double[K][n][n]; double[][] weighted_exp_den = new double[K][n]; double[][][] weighted_exp = new double[K][n][n]; for (int i = 0; i < n; i++) for (int j = 0; j < n; j++) { double h_muhp = Operations.inner_product(h_t[j], mu_hat_prime_t[i], K); for (int k = 0; k < K; k++) { weighted_exp_num[k][i][j] = h_t[j][k] * Math.exp(h_muhp + h2delta2[j]); weighted_exp_den[k][j] += Math.exp(h_muhp + h2delta2[j]); } } for (int i = 0; i < n; i++) for (int j = 0; j < n; j++) for (int k = 0; k < K; k++) { weighted_exp[k][i][j] = weighted_exp_num[k][i][j] / weighted_exp_den[k][j]; } /* compute sum_mu_hat_prime for later use */ double[] sum_mu_hat_prime = new double[K]; for (int i = 0; i < n; i++) for (int k = 0; k < K; k++) { sum_mu_hat_prime[k] += mu_hat_prime_pre_t[i][k]; } for (int i = 0; i < n; i++) for (int k = 0; k < K; k++) { /* first term */ double g1 = nti_h[t][i][k] * grad_mu_hat_prime_t[i][k]; tmp_grad_h_hat_prime_s[s][i][k] += g1; /* second term */ double g2 = 0; for (int j = 0; j < n; j++) { g2 -= nti[t][j] * weighted_exp[k][i][j] * grad_mu_hat_prime_t[i][k]; } tmp_grad_h_hat_prime_s[s][i][k] += g2; /* third term */ for (int j = 0; j < n; j++) if (G_pre_t[j][i] != 0) { // double g3 = ( h_t[j][k] - (1-lambda) * h_pre_t[j][k] - lambda * // A_pre_t[j][i] * sum_mu_hat_prime[k] ) double g3 = (h_t[j][k] - (1 - lambda) * h_pre_t[j][k] - lambda * A_pre_t[j][i] * mu_hat_prime_pre_t[i][k]) * lambda * A_pre_t[j][i] * grad_mu_hat_prime_pre_t[i][k] / (sigma * sigma); tmp_grad_h_hat_prime_s[s][j][k] += g3; // j instead of i! } } /* fourth term */ for (int i = 0; i < n; i++) for (int k = 0; k < K; k++) { double g4 = -(mu_hat_prime_t[i][k] - mu_hat_prime_pre_t[i][k]) * (grad_mu_hat_prime_t[i][k] - grad_mu_hat_prime_pre_t[i][k]) / (sigma * sigma); tmp_grad_h_hat_prime_s[s][i][k] += g4; } } } else { /* for (int s = 0; s < T-t0; s++) { double[] grad_mu_hat_prime_t = grad_mu_hat_prime_s.get(t * (T-t0) + s); for (int i = 0; i < n; i++) { // first term double g1 = nti_hp[t][i] * grad_mu_hat_prime_t[i]; tmp_grad_h_hat_prime_s[s][i] += g1; // second term double g2 = 0; for (int _j = 0; _j < NEG; _j++) { double weighted_exp_num = 0, weighted_exp_den = 0; int j = neg_samples.get(t)[i][_j]; double htj = h_t[j][0]; double muhti = mu_hat_t[i]; weighted_exp_num += htj * Math.exp(htj * muhti + 0.5 * htj * htj * delta_t * delta_t); for (int _k = 0; _k < NEG; _k++) { int k = neg_samples.get(t)[i][_k]; double muhtk = mu_hat_t[k]; weighted_exp_den += Math.exp(htj * muhtk + 0.5 * htj * htj * delta_t * delta_t); } g2 -= nti[t][j] * weighted_exp_num / weighted_exp_den * grad_mu_hat_prime_t[i]; } tmp_grad_h_hat_prime_s[s][i] += g2; } // fourth term (if any) if (s == t) for (int i = 0; i < n; i++) { double g4 = -h_hat_prime_t[i][0] / (sigma*sigma); tmp_grad_h_hat_prime_s[s][i] += g4; } } */ } } /* update global gradient */ for (int t = 0; t < T - t0; t++) { double[][] grad = new double[n][K]; for (int i = 0; i < n; i++) for (int k = 0; k < K; k++) { grad[i][k] = tmp_grad_h_hat_prime_s[t][i][k]; } grad_h_hat_prime_s.set(t, grad); } FileParser.output_2d(grad_h_hat_prime_s, "./grad/grad_prime_" + iteration + ".txt"); return; }
public static void compute_gradient1(int iteration) { double[][][] tmp_grad_h_hat_s = new double[T - t0][n][K]; for (int t = 0; t < T - t0; t++) { // System.out.println("compute gradient 1, t = " + t); double delta_t = delta_s.get(t); double[][] G_t = GS.get(t); double[][] h_prime_t = h_prime_s.get(t); double[][] mu_hat_t = mu_hat_s.get(t); if (t != 0) { double[][] mu_hat_pre_t = mu_hat_s.get(t - 1); Matrix a = new Matrix(AS.get(t - 1)); Matrix hprime_pre_t = new Matrix(h_prime_s.get(t - 1)); Matrix ave_neighbors = a.times(hprime_pre_t); /* TODO: check whether we can save computation by comparing s and t */ for (int s = 0; s < T - t0; s++) { double[][] grad_hat_t = grad_mu_hat_s.get(t * (T - t0) + s); double[][] grad_hat_pre_t = grad_mu_hat_s.get((t - 1) * (T - t0) + s); double[] hp2delta2 = new double[n]; for (int i = 0; i < n; i++) for (int k = 0; k < K; k++) { hp2delta2[i] += 0.5 * h_prime_t[i][k] * h_prime_t[i][k] * delta_t * delta_t; } for (int i = 0; i < n; i++) { /* first term */ double[] weighted_exp_num = new double[K]; double weighted_exp_den = 0; for (int l = 0; l < n; l++) { double hp_muh = Operations.inner_product(h_prime_t[l], mu_hat_t[i], K); double e = Math.exp(hp_muh + hp2delta2[l]); if (Double.isNaN(e)) { /* check if e explodes */ System.out.println("ERROR2"); Scanner sc = new Scanner(System.in); int gu; gu = sc.nextInt(); } for (int k = 0; k < K; k++) { weighted_exp_num[k] += h_prime_t[l][k] * e; weighted_exp_den += e; } } for (int j = 0; j < n; j++) for (int k = 0; k < K; k++) { double weighted_exp = weighted_exp_num[k] / weighted_exp_den; double gi1 = G_t[i][j] * grad_hat_t[i][k] * (h_prime_t[j][k] - weighted_exp); tmp_grad_h_hat_s[s][i][k] += gi1; } /* second term */ for (int k = 0; k < K; k++) { double gi2 = -(mu_hat_t[i][k] - (1 - lambda) * mu_hat_pre_t[i][k] - lambda * ave_neighbors.get(i, k)) * (grad_hat_t[i][k] - (1 - lambda) * grad_hat_pre_t[i][k]) / (sigma * sigma); tmp_grad_h_hat_s[s][i][k] += gi2; } } } } else { /* no such term (t=0) in ELBO */ /* for (int s = 0; s < T-t0; s++) { double[] grad_hat_t = grad_mu_hat_s.get(t * (T-t0) + s); for (int i = 0; i < n; i++) { double n_it = 0; for (int j = 0; j < n; j++) n_it += G_t[i][j]; // first term double gi1 = -mu_hat_t[i] * grad_hat_t[i] / (sigma * sigma); tmp_grad_h_hat_s[s][i] += gi1; // second term double gi2 = 0; double weighted_exp_num = 0, weighted_exp_den = 0; for (int j = 0; j < NEG; j++) { int l = neg_samples.get(t)[i][j]; double hpl = h_prime_t[l][0]; double muit = mu_hat_t[i]; double e = Math.exp(hpl * muit + 0.5 * hpl * hpl * delta_t * delta_t); // TODO: check if e explodes if (Double.isNaN(e)) { System.out.println("ERROR3"); Scanner sc = new Scanner(System.in); int gu; gu = sc.nextInt(); } weighted_exp_num += hpl * e; weighted_exp_den += e; } double weighted_exp = weighted_exp_num / weighted_exp_den; for (int j = 0; j < n; j++) { gi2 += G_t[i][j] * grad_hat_t[i] * (h_prime_t[j][0] - weighted_exp); } tmp_grad_h_hat_s[s][i] += gi2; } } */ } /* end if-else */ } /* update global gradient */ for (int t = 0; t < T - t0; t++) { double[][] grad = new double[n][K]; for (int i = 0; i < n; i++) for (int k = 0; k < K; k++) { grad[i][k] = tmp_grad_h_hat_s[t][i][k]; } grad_h_hat_s.set(t, grad); } FileParser.output_2d(grad_h_hat_s, "./grad/grad_" + iteration + ".txt"); return; }
public NSeqCommand( String lengthFilename, String seqFilename, String fileType, int numberOfThreads, double fdrCutoff, int numberOfSims, int windowWidth, int centerWidth, double tsCutoff, String genome, boolean verboseQ) { // Set options from command line and run if (lengthFilename != null) { lengthFile = new File(lengthFilename); } seqFile = new File(seqFilename); this.numberOfSims = numberOfSims; this.numberOfThreads = numberOfThreads; this.windowWidth = windowWidth; this.centerWidth = centerWidth; this.fdrCutoff = fdrCutoff; this.fileType = fileType; this.verboseQ = verboseQ; this.tsCutoff = tsCutoff; setMUE(); if (genome.toLowerCase().equals("hg19")) { chrLengths = StandardGenomes.hg19(); FileParser.setOrderedChromosomes(chrLengths); consoleOut("HG19 genome chosen."); consoleOut("Total genome length = " + computeGenomeLength()); lengthFile = null; } else if (genome.toLowerCase().equals("hg18")) { chrLengths = StandardGenomes.hg18(); FileParser.setOrderedChromosomes(chrLengths); consoleOut("HG18 genome chosen."); consoleOut("Total genome length = " + computeGenomeLength()); lengthFile = null; } else if (genome.toLowerCase().equals("mm10")) { chrLengths = StandardGenomes.mm10(); FileParser.setOrderedChromosomes(chrLengths); consoleOut("MM10 genome chosen."); consoleOut("Total genome length = " + computeGenomeLength()); lengthFile = null; } else if (genome.toLowerCase().equals("mm9")) { chrLengths = StandardGenomes.mm9(); FileParser.setOrderedChromosomes(chrLengths); consoleOut("MM9 genome chosen."); consoleOut("Total genome length = " + computeGenomeLength() + "\n"); lengthFile = null; } else if (genome.toLowerCase().equals("ce10")) { chrLengths = StandardGenomes.ce10(); FileParser.setOrderedChromosomes(chrLengths); consoleOut("CE10 genome chosen."); consoleOut("Total genome length = " + computeGenomeLength() + "\n"); lengthFile = null; } else { consoleOut("Chromosome Length File: " + lengthFile); chrLengths = FileParser.getChromosomeLength(lengthFile.getPath()); consoleOut("Total genome length = " + computeGenomeLength()); consoleOut("Using " + numberOfThreads + " threads."); consoleOut( "Reading data and constructing probability distributions for nucleosome centers..."); } Runnable read = new Runnable() { public void run() { DataProcess(); } }; Thread readThread = new Thread(read); readThread.start(); }
private void DataProcess() { long start = System.currentTimeMillis(); NucleosomeDetector nd = null; if (fileType.equals("bed")) { consoleOut("Reading " + seqFile.getPath()); br = new BedFileReader( chrLengths, seqFile.getPath(), 68, NucleosomeCenter.getBetaBins(1.92038028, 1.89366713, 11)); } else if (fileType.equals("sam")) { br = new BamFileReader( chrLengths, seqFile.getPath(), 68, NucleosomeCenter.getBetaBins(1.92038028, 1.89366713, 11)); } else if (fileType.equals("bam")) { br = new BamFileReader( chrLengths, seqFile.getPath(), 68, NucleosomeCenter.getBetaBins(1.92038028, 1.89366713, 11)); } else { br = new BedFileReader( chrLengths, seqFile.getPath(), 68, NucleosomeCenter.getBetaBins(1.92038028, 1.89366713, 11)); } HashMap<String, Chromosome> chrList = br.getChrList(); consoleOut("*** Finished reading. ***"); for (String chr : chrLengths.keySet()) { if (br.getChrList().get(chr).getPosRawList().length == 0 && br.getChrList().get(chr).getPosRawList().length == 0) { br.removeChr(chr); FileParser.removeOrderedChromosome(chr); } } consoleOut("Detecting nucleosomes...."); // consoleOut(windowWidth + " " + centerWidth + " " + numberOfThreads + " " + numberOfSims); nd = new NucleosomeDetector( windowWidth, centerWidth, numberOfThreads, numberOfSims, chrList, MUE, verboseQ); // int intervalSize = Collections.max(chrLengths.values())/nThreadsChrInteger; int intervalSize = Math.max(Collections.min(chrLengths.values()) / numberOfThreads, 100000); // intervalSize = 50000; nucleosomes = nd.computeIntervalWise(tsCutoff, intervalSize); consoleOut("Computing FDR using " + numberOfSims + " simulations..."); nd.computeFDR(tsCutoff, nucleosomes, intervalSize); SimpleDateFormat df = new SimpleDateFormat("MM.dd.yyyy_HH.mm.ss"); String formattedDate = df.format(new Date(System.currentTimeMillis())); String tmp = seqFile.getPath(); if (tmp.lastIndexOf(".txt") == tmp.length() - 4) { tmp = tmp.substring(0, tmp.lastIndexOf(".txt")); } String outFile = tmp + "_NSeq_" + formattedDate + ".txt"; String outWig = tmp + "_NSeq_" + formattedDate + ".wig"; String info = "# Sequence file: " + seqFile.getPath() + "\n"; if (lengthFile != null) { info += "# Chromosome length file:" + lengthFile.getPath() + "\n"; } int numPassedNuc = 0; try { numPassedNuc = FileOutput.writeNucleosomes( nucleosomes, outFile, ((Double) fdrCutoff).toString(), numberOfSims, info); consoleOut( numPassedNuc + " nucleosomes found at " + ((Double) fdrCutoff).toString() + " FDR."); FileOutput.writeWig(nucleosomes, outWig, ((Double) fdrCutoff).toString()); } catch (IOException ex) { ex.printStackTrace(); } long elapsedTimeMillis = System.currentTimeMillis() - start; double elapsedTimeSec = elapsedTimeMillis / 1000.0; consoleOut("Output written to " + outFile + ""); consoleOut("Time taken: " + Double.toString(elapsedTimeSec)); consoleOut("********** Finished processing. **********"); }