/** * Prase downloaded uniSTS data source file * * @param file Name of the file to parse * @throws FatalException Throws exception if error occurs during parsing */ public void parse(FileInfo file) throws FatalException { /** * UniStsParser parser deals with single file only. So just pick up the first file name from the * list of files */ String fileName = (String) file.getFiles().firstElement(); System.out.println("UNISTS parsing started"); Logger.log(" UniSts::parsing started " + fileName, Logger.INFO); boolean noAlias = false; /** * Based on the name of source file to parse detect whether the aliase or base unists file is * getting parsed. This can be detected by checking whether the filename has "alias" keyword in * its name. It indicates that the file name is having information about unists aliases and * hence will populate unists_alias table and not the other uniSTS base tables. Below function * returns true or false based on whether the file is for UniSTS Alias or not. */ noAlias = findFormat(fileName); /** * call initialise table method to set the metadata information about each of the used table. * the information will be present with DBManager */ initializeTables(noAlias); Logger.log("initilalise tables over", Logger.INFO); /** * Initialise file writers for each of the file which will correspond to each of the table being * populated. The file writers will be stored in m_fileWriterHashTable present with the base * class parser. Later on when writeRecordToDb method is called file writer for the required * table is obtained from the HashTable and record is written there */ createFileWriters(noAlias); Logger.log("Create file writers over", Logger.INFO); /** * Create required record objects based on what(UniSTS/UniSTS alias) file is getting parsed. * These records will later hold the field information which is to be written to file for * loading into the database */ createRecords(noAlias); Logger.log("create records over", Logger.INFO); /** * This function will be called to write metadata information in each of the file where records * will be written later. This file will be input to sqlloader and mysqlimport. sqlloader * requires the table meat information to be present in the data file. This is done by the below * function before we start writing records to the file */ writeMETADATA(); Logger.log("Write metadata over", Logger.INFO); try { while ((m_line = getNextRecord()) != null) { if (false == m_line.startsWith("#")) { parseLine(noAlias); try { if (true == noAlias) { writeRecordToDb(Variables.uniStsBaseTableName, uniStsRecord); String strAccNos = uniStsAccNoRecord.fields[2].toString(); StringTokenizer sTok = new StringTokenizer(strAccNos, ";"); while (sTok.hasMoreTokens()) { String strAccNo = sTok.nextToken(); uniStsAccNoRecord.fields[2].setLength(0); uniStsAccNoRecord.fields[2].append(strAccNo); writeRecordToDb(Variables.uniStsAccessionTableName, uniStsAccNoRecord); } } } catch (InsertException ie) { Logger.log("Exception in UniSTS parser :" + ie.getMessage(), Logger.DEBUG); } /** current record is completed, reset various table records */ resetRecords(noAlias); } } } catch (IOException ioex) { /** io exception occured - remaining records will be skipped ? ? */ Logger.log("Exception in UniSTS parser :" + ioex.getMessage(), Logger.DEBUG); throw new FatalException(ioex.getMessage()); } finally { Logger.log(" UNISTS::parsing over. ", Logger.INFO); } /** * set the variable representing the unists last modified value in the revision_history table */ Variables.UniSTSRevisionHistory = getFileRevisionHistory(fileName); }
/** * Method to parse file * * @param file Information of the file to parse * @exception FatalException throws exception if error during parsing */ public void parse(FileInfo file) throws FatalException { /** * JDOM parser deals with single file only. So just pick up the first file name from the list of * files */ String fileName = (String) file.getFiles().firstElement(); String oldFileName = fileName; /** JDOM element scanner for the xml file */ ElementScanner xmlScanner; Logger.log("Starting parsing for " + m_dbType + ": " + fileName, Logger.INFO); ElementListener xmlListener = null; try { long startParsingTime = System.currentTimeMillis(); boolean history = false; history = findFormat(fileName); /** Check if file to be parsed is a Gene-History file */ if (false == history) { xmlScanner = new ElementScanner(); xmlScanner.setValidation(false); /** add the listener for the individual nodes according the the dbType specified. */ if (m_dbType.equalsIgnoreCase(Constants.ENTREZGENE)) { String convertedFileName = null; if (fileName.endsWith(".xml") == false) { convertedFileName = ConvertToXml(fileName); fileName = convertedFileName; } else { convertedFileName = fileName; } Logger.log("Converted .asn file to XML " + convertedFileName, Logger.WARNING); m_parserDataObject = new EntrezParser(file, m_filesParsed, history); /** Initialise the system_termtree and system_termdata tables. */ startSystemTreeDataFiles(); xmlListener = new JDOMXMLListener(m_dbType, convertedFileName, ((EntrezParser) m_parserDataObject)); /** add a listener for node 'Entrezgene' in xml file */ xmlScanner.addElementListener(xmlListener, Constants.ENTREZNODE); } Logger.log(xmlScanner.toString(), Logger.INFO); Logger.log("input to xmlscanner " + fileName, Logger.INFO); /** * scan the input file and call elementMatched function whenever the required node is found * in the input file */ xmlScanner.parse(new InputSource(fileName)); /** close all the open fileWriter streams. */ m_parserDataObject.closeFileWriters(); deleteDownloadedFile(file); } /** Or if it is a Gene-History file */ else { Logger.log("Parsing started for GENE_HISTORY table", Logger.INFO); m_parserDataObject = new EntrezParser(file, m_filesParsed, history); EntrezParser tempParser = (EntrezParser) m_parserDataObject; while ((m_line = getNextRecord()) != null) { StringTokenizer sTok = new StringTokenizer(m_line); tempParser.parseGeneHistory(history, sTok); } m_parserDataObject.closeFileWriters(); } Logger.log("Parsing over for " + m_dbType + ": " + fileName, Logger.INFO); long endParsingTime = System.currentTimeMillis(); long parsingTime = endParsingTime - startParsingTime; Logger.log("Parsing Time for: " + m_dbType + ": " + parsingTime, Logger.INFO); } catch (NullPointerException nexcp) { Variables.errorCount++; Logger.log("NullPointerException has occured: " + nexcp.getMessage(), Logger.FATAL); } catch (OutOfMemoryError outexcp) { Variables.errorCount++; Logger.log("OutOfMemoryError has occured: " + outexcp.getMessage(), Logger.FATAL); } catch (JDOMException jexcp) { Variables.errorCount++; Logger.log("JDOMException has occured: " + jexcp.getMessage(), Logger.FATAL); } catch (SAXException saxexcp) { Variables.errorCount++; Logger.log("SAXException has occured: " + saxexcp.getMessage(), Logger.FATAL); } catch (IOException ioexcp) { Variables.errorCount++; Logger.log("IOException has occured: " + ioexcp.getMessage(), Logger.FATAL); } }