/** * This Function returns true if file is UNISTS false for UNISTSAlias. * * @param fileName FileName for which to check if it is UniSTS alias file * @return b */ private boolean findFormat(String fileName) { if (-1 == fileName.lastIndexOf("alias")) { Logger.log(" alias = false ", Logger.INFO); return true; } else { Logger.log(" alias = true", Logger.INFO); return false; } }
/** * Method to find format of the file (History or normal file) * * @param fileName Name of the file * @return true if file is not history file */ private boolean findFormat(String fileName) { /** Check if the file-name contains "history" in it */ if (-1 == fileName.lastIndexOf(Constants.HISTORYFILE)) { Logger.log(" history = false ", Logger.DEBUG); return false; } else { Logger.log(" history = true", Logger.DEBUG); return true; } }
/** * Method to initialize file writers for uniSTS data source * * @param alias boolean to show whether file is uniSTS.alias * @throws FatalException Throws exception if unable to initialise file writers */ private void createFileWriters(boolean noAlias) throws FatalException { try { /** * If UniSTS.sts file is input then unists base table and unists_accession tables are * populated so their file writers are being initialised */ if (true == noAlias) { m_fileWriterHashTable.put( Variables.uniStsBaseTableName, new FileWriter(Variables.uniStsBaseTableName + "." + m_fileToParse)); m_fileWriterHashTable.put( Variables.uniStsAccessionTableName, new FileWriter(Variables.uniStsAccessionTableName + "." + m_fileToParse)); } else { /** In case of UniSTS.alias file initialise corresponsing table */ m_fileWriterHashTable.put( Variables.uniStsAliasTableName, new FileWriter(Variables.uniStsAliasTableName + "." + m_fileToParse)); } } catch (IOException ioEx) { Logger.log( "Uniable to initialize file writers (UniSTS parser): " + ioEx.getLocalizedMessage(), Logger.INFO); } }
/** * Parsing of the Unigene record for 1st,5th,7th and 8th token. The first field indicates STS id. * The fifth (NAME) coresponds to STSNAME. The seventh(ACCESSION NUMBER) & eighth(ORGANISM). For * the UniStsAlias Record we use 1st and 2nd token The first field indicates STS id. The first * field indicates Aliases. * * @param alias boolean to indicate if file being parsed in unSTS.alias */ private void parseLine(boolean noAlias) throws FatalException { m_tokenizer = new StringTokenizer(m_line, m_fieldDelimiter); if (true == noAlias) { /** noAlias = true ---> filename = UNISTS.STS */ if (m_tokenizer.countTokens() >= Constants.uniStsTableCols) { String uniStsElement = null; int count = 0; while (m_tokenizer.hasMoreTokens()) { uniStsElement = m_tokenizer.nextToken(); uniStsElement = uniStsElement.trim(); count++; // ith term if (count == 1) { uniStsRecord.fields[0].append(uniStsElement); uniStsAccNoRecord.fields[0].append(uniStsElement); } else if (count == 5) { uniStsRecord.fields[1].append(uniStsElement); } else if (count == 7) { uniStsAccNoRecord.fields[2].append(uniStsElement); } else if (count == 8) { String localTaxid = (String) Variables.hmOrganismLocalId.get(uniStsElement); uniStsRecord.fields[2].append(localTaxid); uniStsAccNoRecord.fields[1].append(localTaxid); } } } } else { /** noAlias = false ---> filename = UNISTSalias file */ if (m_tokenizer.countTokens() >= Constants.uniStsAliasTableCols) { String uniStsAliasElement = null; int count = 0; /** here we have to add the records with same locus id,different alias. */ while (m_tokenizer.hasMoreTokens()) { uniStsAliasElement = m_tokenizer.nextToken(); uniStsAliasElement = uniStsAliasElement.trim(); count++; // ith term if (count == 1) uniStsAliasRecord.fields[0].append(uniStsAliasElement); else if (count == 2) { StringTokenizer token = new StringTokenizer(uniStsAliasElement, semiColonSeperator); int counttoken = token.countTokens(); if (counttoken != 0) { StringBuffer id = new StringBuffer(uniStsAliasRecord.fields[0].toString()); uniStsAliasRecord.resetAllFields(); while (token.hasMoreElements()) { uniStsAliasRecord.fields[0].append(id.toString()); uniStsAliasRecord.fields[1].append(token.nextElement()); try { writeRecordToDb(Variables.uniStsAliasTableName, uniStsAliasRecord); } catch (InsertException ie) { Logger.log("Exception in UniSTS parser :" + ie.getMessage(), Logger.DEBUG); } uniStsAliasRecord.resetAllFields(); } } else { uniStsAliasRecord.fields[1].append(uniStsAliasElement); try { writeRecordToDb(Variables.uniStsAliasTableName, uniStsAliasRecord); } catch (InsertException ie) { Logger.log("Exception in UniSTS parser :" + ie.getMessage(), Logger.DEBUG); } } resetRecords(false); } } } } }
/** * Prase downloaded uniSTS data source file * * @param file Name of the file to parse * @throws FatalException Throws exception if error occurs during parsing */ public void parse(FileInfo file) throws FatalException { /** * UniStsParser parser deals with single file only. So just pick up the first file name from the * list of files */ String fileName = (String) file.getFiles().firstElement(); System.out.println("UNISTS parsing started"); Logger.log(" UniSts::parsing started " + fileName, Logger.INFO); boolean noAlias = false; /** * Based on the name of source file to parse detect whether the aliase or base unists file is * getting parsed. This can be detected by checking whether the filename has "alias" keyword in * its name. It indicates that the file name is having information about unists aliases and * hence will populate unists_alias table and not the other uniSTS base tables. Below function * returns true or false based on whether the file is for UniSTS Alias or not. */ noAlias = findFormat(fileName); /** * call initialise table method to set the metadata information about each of the used table. * the information will be present with DBManager */ initializeTables(noAlias); Logger.log("initilalise tables over", Logger.INFO); /** * Initialise file writers for each of the file which will correspond to each of the table being * populated. The file writers will be stored in m_fileWriterHashTable present with the base * class parser. Later on when writeRecordToDb method is called file writer for the required * table is obtained from the HashTable and record is written there */ createFileWriters(noAlias); Logger.log("Create file writers over", Logger.INFO); /** * Create required record objects based on what(UniSTS/UniSTS alias) file is getting parsed. * These records will later hold the field information which is to be written to file for * loading into the database */ createRecords(noAlias); Logger.log("create records over", Logger.INFO); /** * This function will be called to write metadata information in each of the file where records * will be written later. This file will be input to sqlloader and mysqlimport. sqlloader * requires the table meat information to be present in the data file. This is done by the below * function before we start writing records to the file */ writeMETADATA(); Logger.log("Write metadata over", Logger.INFO); try { while ((m_line = getNextRecord()) != null) { if (false == m_line.startsWith("#")) { parseLine(noAlias); try { if (true == noAlias) { writeRecordToDb(Variables.uniStsBaseTableName, uniStsRecord); String strAccNos = uniStsAccNoRecord.fields[2].toString(); StringTokenizer sTok = new StringTokenizer(strAccNos, ";"); while (sTok.hasMoreTokens()) { String strAccNo = sTok.nextToken(); uniStsAccNoRecord.fields[2].setLength(0); uniStsAccNoRecord.fields[2].append(strAccNo); writeRecordToDb(Variables.uniStsAccessionTableName, uniStsAccNoRecord); } } } catch (InsertException ie) { Logger.log("Exception in UniSTS parser :" + ie.getMessage(), Logger.DEBUG); } /** current record is completed, reset various table records */ resetRecords(noAlias); } } } catch (IOException ioex) { /** io exception occured - remaining records will be skipped ? ? */ Logger.log("Exception in UniSTS parser :" + ioex.getMessage(), Logger.DEBUG); throw new FatalException(ioex.getMessage()); } finally { Logger.log(" UNISTS::parsing over. ", Logger.INFO); } /** * set the variable representing the unists last modified value in the revision_history table */ Variables.UniSTSRevisionHistory = getFileRevisionHistory(fileName); }
/** * Method to parse file * * @param file Information of the file to parse * @exception FatalException throws exception if error during parsing */ public void parse(FileInfo file) throws FatalException { /** * JDOM parser deals with single file only. So just pick up the first file name from the list of * files */ String fileName = (String) file.getFiles().firstElement(); String oldFileName = fileName; /** JDOM element scanner for the xml file */ ElementScanner xmlScanner; Logger.log("Starting parsing for " + m_dbType + ": " + fileName, Logger.INFO); ElementListener xmlListener = null; try { long startParsingTime = System.currentTimeMillis(); boolean history = false; history = findFormat(fileName); /** Check if file to be parsed is a Gene-History file */ if (false == history) { xmlScanner = new ElementScanner(); xmlScanner.setValidation(false); /** add the listener for the individual nodes according the the dbType specified. */ if (m_dbType.equalsIgnoreCase(Constants.ENTREZGENE)) { String convertedFileName = null; if (fileName.endsWith(".xml") == false) { convertedFileName = ConvertToXml(fileName); fileName = convertedFileName; } else { convertedFileName = fileName; } Logger.log("Converted .asn file to XML " + convertedFileName, Logger.WARNING); m_parserDataObject = new EntrezParser(file, m_filesParsed, history); /** Initialise the system_termtree and system_termdata tables. */ startSystemTreeDataFiles(); xmlListener = new JDOMXMLListener(m_dbType, convertedFileName, ((EntrezParser) m_parserDataObject)); /** add a listener for node 'Entrezgene' in xml file */ xmlScanner.addElementListener(xmlListener, Constants.ENTREZNODE); } Logger.log(xmlScanner.toString(), Logger.INFO); Logger.log("input to xmlscanner " + fileName, Logger.INFO); /** * scan the input file and call elementMatched function whenever the required node is found * in the input file */ xmlScanner.parse(new InputSource(fileName)); /** close all the open fileWriter streams. */ m_parserDataObject.closeFileWriters(); deleteDownloadedFile(file); } /** Or if it is a Gene-History file */ else { Logger.log("Parsing started for GENE_HISTORY table", Logger.INFO); m_parserDataObject = new EntrezParser(file, m_filesParsed, history); EntrezParser tempParser = (EntrezParser) m_parserDataObject; while ((m_line = getNextRecord()) != null) { StringTokenizer sTok = new StringTokenizer(m_line); tempParser.parseGeneHistory(history, sTok); } m_parserDataObject.closeFileWriters(); } Logger.log("Parsing over for " + m_dbType + ": " + fileName, Logger.INFO); long endParsingTime = System.currentTimeMillis(); long parsingTime = endParsingTime - startParsingTime; Logger.log("Parsing Time for: " + m_dbType + ": " + parsingTime, Logger.INFO); } catch (NullPointerException nexcp) { Variables.errorCount++; Logger.log("NullPointerException has occured: " + nexcp.getMessage(), Logger.FATAL); } catch (OutOfMemoryError outexcp) { Variables.errorCount++; Logger.log("OutOfMemoryError has occured: " + outexcp.getMessage(), Logger.FATAL); } catch (JDOMException jexcp) { Variables.errorCount++; Logger.log("JDOMException has occured: " + jexcp.getMessage(), Logger.FATAL); } catch (SAXException saxexcp) { Variables.errorCount++; Logger.log("SAXException has occured: " + saxexcp.getMessage(), Logger.FATAL); } catch (IOException ioexcp) { Variables.errorCount++; Logger.log("IOException has occured: " + ioexcp.getMessage(), Logger.FATAL); } }
/** * Method to convert .asn file to .xml * * @param fileName name of the file * @return Path of newly created .xml file * @throws FileNotFoundException Throws exception if error during conversion * @throws IOException Throws exception if error during conversion */ private String ConvertToXml(String fileName) throws FileNotFoundException, IOException { String outFileName = new String(fileName + ".xml"); int index = fileName.lastIndexOf(".gz"); String unzippedFileName = fileName.substring(0, index); Logger.log("input file name is " + fileName, Logger.INFO); Logger.log("unzipped file name is " + unzippedFileName, Logger.INFO); m_inputFileReader.close(); Runtime run = Runtime.getRuntime(); try { StringBuffer cmdUnzip = new StringBuffer(); cmdUnzip.append("gzip -d " + fileName); /** o/p file will be file name without gz as extension */ Logger.log(cmdUnzip.toString(), Logger.DEBUG); Process unzip = run.exec(cmdUnzip.toString()); /** * Close the Stream of The Process.If the Stream is not closed then it will not allow next * process to create new Stream. */ unzip.getInputStream().close(); unzip.getOutputStream().close(); unzip.waitFor(); Logger.log("process exit val " + unzip.exitValue(), Logger.INFO); } catch (InterruptedException ie) { Logger.log("Exception : Unzipping file " + fileName + " interrupted", Logger.FATAL); Logger.log(ie.getMessage(), Logger.FATAL); } catch (IOException io) { Logger.log("IOException : unzipping file " + fileName + " failed", Logger.FATAL); Logger.log(io.getMessage(), Logger.FATAL); } Logger.log("successfully unzipped file " + fileName, Logger.INFO); try { StringBuffer cmd = new StringBuffer(); cmd.append(Variables.currentDir + System.getProperty("file.separator")); cmd.append("gene2xml -b -i "); cmd.append(unzippedFileName); cmd.append(" -o "); cmd.append(outFileName); Logger.log("converting asn file " + fileName + " to xml file " + outFileName, Logger.INFO); System.out.println(cmd); Process gene2xml = run.exec(cmd.toString()); // Close the Stream of The Process.If the Stream is not closed // then it will not allow next process to create new Stream. InputStream stderr = gene2xml.getInputStream(); InputStreamReader isr = new InputStreamReader(stderr); BufferedReader br = new BufferedReader(isr); String line = null; String errCheck = br.readLine(); System.out.println(errCheck); while ((line = br.readLine()) != null) { System.out.println("inside print output line"); System.out.println(line); } gene2xml.getInputStream().close(); gene2xml.waitFor(); Logger.log("completed asn to xml file convertion", Logger.INFO); Logger.log("process exit val " + gene2xml.exitValue(), Logger.INFO); deleteDownloadedFile(unzippedFileName); } catch (IOException io) { Logger.log( "IOException : Gene to Xml convertion for Entrezegene file " + fileName + " failed", Logger.FATAL); Logger.log(io.getMessage(), Logger.FATAL); outFileName = null; } catch (InterruptedException ie) { Logger.log( "Exception : Gene to Xml convertion for Entrezegene file " + fileName + " interrupted", Logger.FATAL); Logger.log(ie.getMessage(), Logger.FATAL); outFileName = null; } Logger.log("Converted file name " + outFileName, Logger.INFO); return outFileName; }