Beispiel #1
0
  /**
   * Prase downloaded uniSTS data source file
   *
   * @param file Name of the file to parse
   * @throws FatalException Throws exception if error occurs during parsing
   */
  public void parse(FileInfo file) throws FatalException {
    /**
     * UniStsParser parser deals with single file only. So just pick up the first file name from the
     * list of files
     */
    String fileName = (String) file.getFiles().firstElement();

    System.out.println("UNISTS parsing started");
    Logger.log(" UniSts::parsing started " + fileName, Logger.INFO);
    boolean noAlias = false;
    /**
     * Based on the name of source file to parse detect whether the aliase or base unists file is
     * getting parsed. This can be detected by checking whether the filename has "alias" keyword in
     * its name. It indicates that the file name is having information about unists aliases and
     * hence will populate unists_alias table and not the other uniSTS base tables. Below function
     * returns true or false based on whether the file is for UniSTS Alias or not.
     */
    noAlias = findFormat(fileName);

    /**
     * call initialise table method to set the metadata information about each of the used table.
     * the information will be present with DBManager
     */
    initializeTables(noAlias);
    Logger.log("initilalise tables over", Logger.INFO);

    /**
     * Initialise file writers for each of the file which will correspond to each of the table being
     * populated. The file writers will be stored in m_fileWriterHashTable present with the base
     * class parser. Later on when writeRecordToDb method is called file writer for the required
     * table is obtained from the HashTable and record is written there
     */
    createFileWriters(noAlias);
    Logger.log("Create file writers over", Logger.INFO);

    /**
     * Create required record objects based on what(UniSTS/UniSTS alias) file is getting parsed.
     * These records will later hold the field information which is to be written to file for
     * loading into the database
     */
    createRecords(noAlias);
    Logger.log("create records over", Logger.INFO);

    /**
     * This function will be called to write metadata information in each of the file where records
     * will be written later. This file will be input to sqlloader and mysqlimport. sqlloader
     * requires the table meat information to be present in the data file. This is done by the below
     * function before we start writing records to the file
     */
    writeMETADATA();
    Logger.log("Write metadata over", Logger.INFO);
    try {
      while ((m_line = getNextRecord()) != null) {
        if (false == m_line.startsWith("#")) {
          parseLine(noAlias);
          try {
            if (true == noAlias) {
              writeRecordToDb(Variables.uniStsBaseTableName, uniStsRecord);

              String strAccNos = uniStsAccNoRecord.fields[2].toString();
              StringTokenizer sTok = new StringTokenizer(strAccNos, ";");
              while (sTok.hasMoreTokens()) {
                String strAccNo = sTok.nextToken();
                uniStsAccNoRecord.fields[2].setLength(0);
                uniStsAccNoRecord.fields[2].append(strAccNo);
                writeRecordToDb(Variables.uniStsAccessionTableName, uniStsAccNoRecord);
              }
            }
          } catch (InsertException ie) {
            Logger.log("Exception in UniSTS parser :" + ie.getMessage(), Logger.DEBUG);
          }
          /** current record is completed, reset various table records */
          resetRecords(noAlias);
        }
      }
    } catch (IOException ioex) {
      /** io exception occured - remaining records will be skipped ? ? */
      Logger.log("Exception in UniSTS parser :" + ioex.getMessage(), Logger.DEBUG);
      throw new FatalException(ioex.getMessage());
    } finally {
      Logger.log(" UNISTS::parsing over. ", Logger.INFO);
    }
    /**
     * set the variable representing the unists last modified value in the revision_history table
     */
    Variables.UniSTSRevisionHistory = getFileRevisionHistory(fileName);
  }
Beispiel #2
0
  /**
   * Method to parse file
   *
   * @param file Information of the file to parse
   * @exception FatalException throws exception if error during parsing
   */
  public void parse(FileInfo file) throws FatalException {
    /**
     * JDOM parser deals with single file only. So just pick up the first file name from the list of
     * files
     */
    String fileName = (String) file.getFiles().firstElement();

    String oldFileName = fileName;

    /** JDOM element scanner for the xml file */
    ElementScanner xmlScanner;
    Logger.log("Starting parsing for " + m_dbType + ": " + fileName, Logger.INFO);
    ElementListener xmlListener = null;
    try {
      long startParsingTime = System.currentTimeMillis();
      boolean history = false;
      history = findFormat(fileName);
      /** Check if file to be parsed is a Gene-History file */
      if (false == history) {
        xmlScanner = new ElementScanner();
        xmlScanner.setValidation(false);
        /** add the listener for the individual nodes according the the dbType specified. */
        if (m_dbType.equalsIgnoreCase(Constants.ENTREZGENE)) {
          String convertedFileName = null;
          if (fileName.endsWith(".xml") == false) {
            convertedFileName = ConvertToXml(fileName);
            fileName = convertedFileName;
          } else {
            convertedFileName = fileName;
          }
          Logger.log("Converted .asn file to XML " + convertedFileName, Logger.WARNING);
          m_parserDataObject = new EntrezParser(file, m_filesParsed, history);
          /** Initialise the system_termtree and system_termdata tables. */
          startSystemTreeDataFiles();
          xmlListener =
              new JDOMXMLListener(m_dbType, convertedFileName, ((EntrezParser) m_parserDataObject));
          /** add a listener for node 'Entrezgene' in xml file */
          xmlScanner.addElementListener(xmlListener, Constants.ENTREZNODE);
        }
        Logger.log(xmlScanner.toString(), Logger.INFO);
        Logger.log("input to xmlscanner " + fileName, Logger.INFO);
        /**
         * scan the input file and call elementMatched function whenever the required node is found
         * in the input file
         */
        xmlScanner.parse(new InputSource(fileName));
        /** close all the open fileWriter streams. */
        m_parserDataObject.closeFileWriters();
        deleteDownloadedFile(file);
      }
      /** Or if it is a Gene-History file */
      else {
        Logger.log("Parsing started for GENE_HISTORY table", Logger.INFO);
        m_parserDataObject = new EntrezParser(file, m_filesParsed, history);
        EntrezParser tempParser = (EntrezParser) m_parserDataObject;
        while ((m_line = getNextRecord()) != null) {
          StringTokenizer sTok = new StringTokenizer(m_line);
          tempParser.parseGeneHistory(history, sTok);
        }
        m_parserDataObject.closeFileWriters();
      }
      Logger.log("Parsing over for " + m_dbType + ": " + fileName, Logger.INFO);
      long endParsingTime = System.currentTimeMillis();
      long parsingTime = endParsingTime - startParsingTime;
      Logger.log("Parsing Time for: " + m_dbType + ": " + parsingTime, Logger.INFO);
    } catch (NullPointerException nexcp) {
      Variables.errorCount++;
      Logger.log("NullPointerException has occured: " + nexcp.getMessage(), Logger.FATAL);
    } catch (OutOfMemoryError outexcp) {
      Variables.errorCount++;
      Logger.log("OutOfMemoryError has occured: " + outexcp.getMessage(), Logger.FATAL);
    } catch (JDOMException jexcp) {
      Variables.errorCount++;
      Logger.log("JDOMException has occured: " + jexcp.getMessage(), Logger.FATAL);
    } catch (SAXException saxexcp) {
      Variables.errorCount++;
      Logger.log("SAXException has occured: " + saxexcp.getMessage(), Logger.FATAL);
    } catch (IOException ioexcp) {
      Variables.errorCount++;
      Logger.log("IOException has occured: " + ioexcp.getMessage(), Logger.FATAL);
    }
  }